##############################################
message("Basic setup")

rm(list=ls())
set.seed(1234) 
options(scipen=999, digits=5)

##############################################
message("required libraries and setwd")

# Load/install packages --
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
  plyr,
  ggplot2,
  reshape2,
  zoo,
  sandwich,
  AER,
  xtable,
  stats,
  tidyr,
  dplyr,
  weights,
  estimatr,
  forcats,
  sf,
  ggpubr
)

#Set wd ---
home <-dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(home)

##############################################
message("load data, functions, etc.")

load("panel_taxtime.Rda")
load("panel_goodtaxpayer.Rda")
load("naturalex_debt_gtp.Rda")
load("fieldex_data.Rda")
load("survey_data.Rda")

source("t_test.R")
source("test_diff.R")


#for maps

#Data for "Field Experiment: Geographic Distribution of Eligible and Ineligible Taxpayers"
datafx_map<-read_sf("v_mdg_parcelas_field_PROP_BP.shp")
shp<-read_sf("Marco2011_SEG_Montevideo_Total.shp")

#Data for "Natural Experiment: Property Plots of Winning Account Numbers"
naturalex_data_map<-read_sf("naturalex_map.shp")
shp2<-read_sf("v_mdg_manzanas.shp")

#Data for Reward Programs for Good Taxpayers in Latin America
latam_map_data<-read_sf("latinamerica.shp")


##############################################
message("Clean & add new variables")

#cross section natural experiment data
naturalex<-buen_pagador_panel[buen_pagador_panel$centered==0,]

# New variables for the natural experiment data
taxes_panel$YEARMON_LOTT <- as.yearmon(taxes_panel$FECHA_SORTEO2) #year & month of the lottery
taxes_panel$YEAR_LOTT <- as.numeric(format(taxes_panel$YEARMON_LOTT, "%Y")) # year of the lottery
taxes_panel$missed_payment <- as.numeric(taxes_panel$en_fecha==0) #missed payment dummy
taxes_panel$compliance <- as.numeric(taxes_panel$nr_paymntsowed==0) #compliance dummy

# save data in the control group
control<-taxes_panel %>% filter(TREATMENT==0)

# add tax names in english
taxes_panel$tax <- taxes_panel$TRIBUTO
taxes_panel$tax <- as.factor(taxes_panel$tax)
levels(taxes_panel$tax) <-  c("Property", "Vehicle", "Sewage", "Head")

# dummy if it is a Vehicle tax o other tax
taxes_panel$holiday_type <- 1
taxes_panel$holiday_type[taxes_panel$TRIBUTO=="Patente de Rodados"] <- 0

# st & t_st variables (time standardized by payments of relevant lottery) ---
holiday <- taxes_panel[taxes_panel$cuota_exonerada==1,] #treated subset

#Contribucion Inmobiliaria
CI <- c(min(holiday$t[holiday$TRIBUTO=="Contribucion Inmobiliaria"],na.rm = T),
        max(holiday$t[holiday$TRIBUTO=="Contribucion Inmobiliaria"],na.rm = T))

taxes_panel <- taxes_panel[!(taxes_panel$TRIBUTO=="Contribucion Inmobiliaria" &
                               taxes_panel$ES_BP==1 &
                               taxes_panel$t>=CI[1] & taxes_panel$t<=CI[2]),]
taxes_panel$st[taxes_panel$TRIBUTO=="Contribucion Inmobiliaria"] <- CI[2] - 3
taxes_panel <- taxes_panel[!(taxes_panel$TRIBUTO=="Contribucion Inmobiliaria" &
                               taxes_panel$ES_BP==0 &
                               taxes_panel$t>=4 & taxes_panel$t<=CI[2]),]

#Patente de Rodados
PR <- c(min(holiday$t[holiday$TRIBUTO=="Patente de Rodados"],na.rm = T),
        max(holiday$t[holiday$TRIBUTO=="Patente de Rodados"],na.rm = T))
taxes_panel <- taxes_panel[!(taxes_panel$TRIBUTO=="Patente de Rodados" &
                               taxes_panel$ES_BP==1 &
                               taxes_panel$t>=PR[1] & taxes_panel$t<=PR[2]),]
taxes_panel$st[taxes_panel$TRIBUTO=="Patente de Rodados"] <- 0

#Saneamiento
TS <- c(min(holiday$t[holiday$TRIBUTO=="Saneamiento"],na.rm = T),
        max(holiday$t[holiday$TRIBUTO=="Saneamiento"],na.rm = T))
taxes_panel <- taxes_panel[!(taxes_panel$TRIBUTO=="Saneamiento" &
                               taxes_panel$ES_BP==1 &
                               taxes_panel$t>=TS[1] & taxes_panel$t<=TS[2]),]
taxes_panel$st[taxes_panel$TRIBUTO=="Saneamiento"] <- TS[2] - 3
taxes_panel <- taxes_panel[!(taxes_panel$TRIBUTO=="Saneamiento" &
                               taxes_panel$ES_BP==0 &
                               taxes_panel$t>=4 & taxes_panel$t<=TS[2]),]

#Tributos Domiciliarios
TD <- c(1,max(holiday$t[holiday$TRIBUTO=="Tributos Domiciliarios"],na.rm = T))
taxes_panel <- taxes_panel[!(taxes_panel$TRIBUTO=="Tributos Domiciliarios" &
                               taxes_panel$ES_BP==1 &
                               taxes_panel$t>=TD[1] & taxes_panel$t<=TD[2]),]
taxes_panel$st[taxes_panel$TRIBUTO=="Tributos Domiciliarios"] <- TD[2] - 3
taxes_panel <- taxes_panel[!(taxes_panel$TRIBUTO=="Tributos Domiciliarios" &
                               taxes_panel$ES_BP==0 &
                               taxes_panel$t>=4 & taxes_panel$t<=TD[2]),]

taxes_panel$t_st <- ifelse((taxes_panel$ES_BP==1 & taxes_panel$t > 0) |
                             (taxes_panel$ES_BP==0 & taxes_panel$t > 4),
                           taxes_panel$t - taxes_panel$st, taxes_panel$t)
rm(holiday, CI, TD, TS, PR)



# add variables for the field experiment data

#treatment names variable
fieldex$tpooled <- NA
fieldex$tpooled[fieldex$treatment==6] <- "Control"
fieldex$tpooled[fieldex$treatment==0] <- "Reminder"
fieldex$tpooled[fieldex$treatment %in% c(1,2,4)] <- "Reminder+Info"


################################################################################
# PAPER Tables & Figures
################################################################################


##############################################
message("MAIN PAPER: Figure 1")

set.seed(12345)

## SIMULATION
z <- 2 #tax payment
p <- 1 #probability of punishment
c <- 1 #cost of punishment
b <- .05 #intrinsic benefit of compliance

# function for the decision to comply in a single period
sim_function <- function(z, b, p, c, theta, lambda0){
  
  lambda <- NA
  for(i in 1:50){
    v <- rnorm(1) #random noise
    vector_theta <- (theta^((i+49):1))
    vector_lambda <- if(i==1){lambda0}else{c(lambda0, lambda)}
    lambda[i] <- ifelse((b + (p*c) - z + (1/5000)*z +
                           vector_theta %*% vector_lambda - v) > 0, 1, 0)}
  
  return(lambda)
}

# complete iteration including shock
sim_function1000 <- function(z, b, p,  c, theta, history, N, treat){
  sims <- matrix(NA, N, 50)
  for(i in 1:N){
    if (history=="perfect"){lambda0 <- rbinom(50,1,prob=1)}
    if (history=="marginal"){lambda0 <- rbinom(50,1,prob=.4)}
    if (treat==1) {lambda0[48:50] <- c(0, 0, 0)}
    sims[i, ] <- sim_function(z=z, b=b, p=p, c=c, theta=theta, lambda0=lambda0)}
  sims <- apply(sims, 2, mean)
  return(sims)
}

paym <- rbind(
  cbind(sim_function1000(z=z, b=b, p=p, c=c, theta=0.7, history = "perfect",
                         treat=1, N=10000), "Perfect Past Complier"),
  cbind(sim_function1000(z=z, b=b, p=p, c=c, theta=0.7, history = "marginal",
                         treat=1, N=10000), "Imperfect Past Complier"))

paym <- as.data.frame(paym)
paym$t <- c(1:50, 1:50)
names(paym)[1:2] <- c("mean_payments","type")
paym$type <- as.factor(paym$type)
paym$mean_payments <- as.numeric(as.character(paym$mean_payments))

#Figure
ggplot(paym, aes(x=t, y=mean_payments, group=type)) + 
  geom_line(aes(linetype = type)) + 
  theme_bw() + ylim(0, 1) + 
  ylab("Average Rate of Compliance") +
  xlab(expression(paste("Payment Periods ", italic("(t)")))) + 
  labs(shape = "Taxpayer Type") +
  theme(legend.position = "bottom")

##############################################
message("MAIN PAPER: Figure 2")

# Paid on time figure
control %>% # We use only observations in the control group
  # group by year and take the mean of paid on time 
  group_by(YEAR_LOTT) %>% 
  dplyr::summarise(
    mean = mean(en_fecha, na.rm=T),
    outcome = "Paid Bill On Time"  
  ) %>% 
  ggplot(aes(YEAR_LOTT, mean)) + 
  facet_wrap(~ outcome, scales="free_y") +
  geom_point(size=2) +
  geom_line(size=1) +
  xlab("Year") +
  ylab("Mean") +
  theme_bw() + ylim(c(0,.75)) +
  scale_colour_manual(values = c("black")) +
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        axis.text.x = element_text(size = rel(1.25), angle = 90, vjust = 0.5, hjust=1),
        legend.position = "bottom",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))

# Accumulated Debt figure
control %>% # We use only observations in the control group
  # group by year and take the mean of accumulated debt
  group_by(YEAR_LOTT) %>% 
  dplyr::summarise(
    mean = mean(nr_paymntsowed, na.rm=T),
    outcome = "Accumulated Debt\n(Payments Owed)"
  ) %>% 
  ggplot(aes(YEAR_LOTT, mean)) + 
  facet_wrap(~ outcome, scales="free_y") +
  geom_point(size=2) +
  geom_line(size=1) +
  xlab("Year") +
  ylab("Mean") +
  theme_bw() + ylim(c(0,8)) +
  scale_colour_manual(values = c("black")) +
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        axis.text.x = element_text(size = rel(1.25), angle = 90, vjust = 0.5, hjust=1),
        legend.position = "bottom",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))


# Good taxpayer figure
control %>% # We use only observations in the control group
  filter(t==0) %>%  # We keep only observations for t=0
  # Share of good taxpayer by year
  group_by(YEAR_LOTT) %>% 
  dplyr::summarise(
    mean = mean(ES_BP, na.rm=T),
    outcome = "Good Taxpayer"
  ) %>% 
  ggplot(aes(YEAR_LOTT, mean)) + 
  facet_wrap(~ outcome, scales="free_y") +
  geom_point(size=2) +
  geom_line(size=1) +
  xlab("Year") +
  ylab("Mean") +
  theme_bw() + ylim(c(0,.4)) +
  scale_colour_manual(values = c("black")) +
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        axis.text.x = element_text(size = rel(1.25), angle = 90, vjust = 0.5, hjust=1),
        legend.position = "bottom",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))


##############################################
message("MAIN PAPER: Table 3.1")

# Natural Experiment: Sample Sizes

# Sample sizes by Tax & Taxpayer Type
naturalex %>% # keep only the cross-section of observations at the time of treatment assignment
  dplyr::group_by(ES_BP, TRIBUTO) %>% 
  dplyr::summarize(
    non_winning_accounts = sum(TREATMENT==0),
    winning_accounts = sum(TREATMENT==1),
    study_group = n()
  )

# Sample sizes by Taxpayer Type
naturalex %>% # keep only the cross-section of observations at the time of treatment assignment
  dplyr::group_by(ES_BP) %>% 
  dplyr::summarize(
    non_winning_accounts = sum(TREATMENT==0),
    winning_accounts = sum(TREATMENT==1),
    study_group = n()
  )

##############################################
message("MAIN PAPER: Table 3.2")

# table by treatment and taxpayer type
table(fieldex$tpooled, fieldex$type)

# table by taxpayer type
table(fieldex$type)

##############################################
message("MAIN PAPER: Figure 3")

# The Negative Impact of Holidays on Compliance

table(taxes_panel$TREATMENT, taxes_panel$TIENE_EXO)

t <- -10:13
btp <- NULL
gtp <- NULL

for (i in t){
  
  temp <- taxes_panel[taxes_panel$t_st == i, ]
  
  # For Ineligible Taxpayers
  btp_est <- tidy(lm_robust(en_fecha ~ TREATMENT, 
                            data = filter(temp, ES_BP==0)))[2,]
  btp <- rbind.data.frame(btp, cbind.data.frame(i, "Ineligible Taxpayers", btp_est))
  
  # For Eligible Taxpayers
  # For good taxpayers, skip periods under the tax holiday
  if (nrow(temp[temp$TREATMENT==1 & temp$ES_BP==1,])==0) next
  
  gtp_est <- tidy(iv_robust(en_fecha ~ TIENE_EXO | TREATMENT, 
                            data = filter(temp, ES_BP==1)))[2,]
  gtp <- rbind.data.frame(gtp, cbind.data.frame(i, "Eligible Taxpayers", gtp_est))
  
}

names(gtp)[2] <- names(btp)[2] <- "type"
plot <- rbind.data.frame(gtp, btp); rm(gtp, btp)


#Figure
ggplot(plot, aes(x=i, y=estimate, shape=type)) + 
  facet_grid(type~.) + 
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") +
  geom_errorbar(aes(x=i,
                    ymin=conf.low, 
                    ymax=conf.high), 
                width=.6, size=.8, position = position_dodge(width = 0.6)) +
  geom_point(size=4, position = position_dodge(width = 0.6)) +
  xlab("Payments Since Tax Holiday") +
  ylab("Paid on Time (CACE)") +
  geom_vline(aes(xintercept=0), size=.7) +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed") +
  theme_bw() +
  scale_x_discrete(limit = t[!t %in% c(1,2,3)], 
                   labels = as.character(c(t[t<1],
                                           t[!t%in%c(1,2,3) & t>0]-3))) +
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        legend.position = "bottom",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))


##############################################
message("MAIN PAPER: Table 4.3")
# Estimated Causal Effects of Tax Holidays

# estimate for 1st period after holiday (t_st==4)
t1_ace <- tidy(lm_robust(en_fecha ~ TREATMENT, 
                         data = filter(taxes_panel, ES_BP==1 & t_st==4)))
t1_cace <- tidy(iv_robust(en_fecha ~ TIENE_EXO | TREATMENT, 
                          data = filter(taxes_panel, ES_BP==1 & t_st==4)))
t1 <- c(t1_ace[1,2], t1_ace[2,2], t1_ace[2,3], t1_ace[2,5], t1_cace[2,2],t1_cace[2,3],t1_cace[2,5]); 
rm(t1_ace, t1_cace)

# estimate for 5th period after holiday (t_st==8)
t5_ace <- tidy(lm_robust(en_fecha ~ TREATMENT, 
                         data = filter(taxes_panel, ES_BP==1 & t_st==8)))
t5_cace <- tidy(iv_robust(en_fecha ~ TIENE_EXO | TREATMENT, 
                          data = filter(taxes_panel, ES_BP==1 & t_st==8)))
t5 <- c(t5_ace[1,2], t5_ace[2,2], t5_ace[2,3], t5_ace[2,5], t5_cace[2,2],t5_cace[2,3],t5_cace[2,5]); 
rm(t5_ace, t5_cace)

# estimate for 10th period after holiday (t_st==13)
t10_ace <- tidy(lm_robust(en_fecha ~ TREATMENT, 
                          data = filter(taxes_panel, ES_BP==1 & t_st==13)))
t10_cace <- tidy(iv_robust(en_fecha ~ TIENE_EXO | TREATMENT, 
                           data = filter(taxes_panel, ES_BP==1 & t_st==13)))
t10 <- c(t10_ace[1,2], t10_ace[2,2], t10_ace[2,3], t10_ace[2,5], t10_cace[2,2],t10_cace[2,3],t10_cace[2,5]); 
rm(t10_ace, t10_cace)

# estimate for periods 1-10 after holiday (t_st==4-13)
t1_10_ace <- tidy(lm_robust(en_fecha ~ TREATMENT, 
                            data = filter(taxes_panel, ES_BP==1 & t_st>3 & t_st<14)))
t1_10_cace <- tidy(iv_robust(en_fecha ~ TIENE_EXO | TREATMENT, 
                             data = filter(taxes_panel, ES_BP==1 & t_st>3 & t_st<14)))
t1_10 <- c(t1_10_ace[1,2], t1_10_ace[2,2], t1_10_ace[2,3], t1_10_ace[2,5],
           t1_10_cace[2,2],t1_10_cace[2,3],t1_10_cace[2,5]); 
rm(t1_10_ace, t1_10_cace)


# estimate for periods 1-10 after holiday (t_st==4-13) PROPERTY TAX
t1_10_ace <- tidy(lm_robust(en_fecha ~ TREATMENT, 
                            data = filter(taxes_panel, ES_BP==1 & t_st>3 & t_st<14 &
                                            TRIBUTO == "Contribucion Inmobiliaria")))
t1_10_cace <- tidy(iv_robust(en_fecha ~ TIENE_EXO | TREATMENT, 
                             data = filter(taxes_panel, ES_BP==1 & t_st>3 & t_st<14 &
                                             TRIBUTO == "Contribucion Inmobiliaria")))
t1_10_property <- c(t1_10_ace[1,2], t1_10_ace[2,2], t1_10_ace[2,3], t1_10_ace[2,5],t1_10_cace[2,2],t1_10_cace[2,3],t1_10_cace[2,5]); 
rm(t1_10_ace, t1_10_cace)


# estimate for periods 1-10 after holiday (t_st==4-13) HEAD
t1_10_ace <- tidy(lm_robust(en_fecha ~ TREATMENT, 
                            data = filter(taxes_panel, ES_BP==1 & t_st>3 & t_st<14 &
                                            TRIBUTO == "Tributos Domiciliarios")))
t1_10_cace <- tidy(iv_robust(en_fecha ~ TIENE_EXO | TREATMENT, 
                             data = filter(taxes_panel, ES_BP==1 & t_st>3 & t_st<14 &
                                             TRIBUTO == "Tributos Domiciliarios")))
t1_10_head <- c(t1_10_ace[1,2], t1_10_ace[2,2], t1_10_ace[2,3], t1_10_ace[2,5], t1_10_cace[2,2],t1_10_cace[2,3],t1_10_cace[2,5]); 
rm(t1_10_ace, t1_10_cace)


# estimate for periods 1-10 after holiday (t_st==4-13) SEWAGE
t1_10_ace <- tidy(lm_robust(en_fecha ~ TREATMENT, 
                            data = filter(taxes_panel, ES_BP==1 & t_st>3 & t_st<14 &
                                            TRIBUTO == "Saneamiento")))
t1_10_cace <- tidy(iv_robust(en_fecha ~ TIENE_EXO | TREATMENT, 
                             data = filter(taxes_panel, ES_BP==1 & t_st>3 & t_st<14 &
                                             TRIBUTO == "Saneamiento")))
t1_10_sewage <- c(t1_10_ace[1,2], t1_10_ace[2,2], t1_10_ace[2,3], t1_10_ace[2,5], 
                  t1_10_cace[2,2],t1_10_cace[2,3],t1_10_cace[2,5]); 
rm(t1_10_ace, t1_10_cace)


# estimate for periods 1-10 after holiday (t_st==4-13) VEHICLE
t1_10_ace <- tidy(lm_robust(en_fecha ~ TREATMENT, 
                            data = filter(taxes_panel, ES_BP==1 & t_st>3 & t_st<14 &
                                            TRIBUTO == "Patente de Rodados")))
t1_10_cace <- tidy(iv_robust(en_fecha ~ TIENE_EXO | TREATMENT, 
                             data = filter(taxes_panel, ES_BP==1 & t_st>3 & t_st<14 &
                                             TRIBUTO == "Patente de Rodados")))
t1_10_vehicle <- c(t1_10_ace[1,2], t1_10_ace[2,2], t1_10_ace[2,3], t1_10_ace[2,5], 
                   t1_10_cace[2,2],t1_10_cace[2,3],t1_10_cace[2,5]); 
rm(t1_10_ace, t1_10_cace)



# Build table
table4.3 <- rbind.data.frame(t1,t5,t10,t1_10, t1_10_property, t1_10_head, 
                             t1_10_sewage, t1_10_vehicle);
rm(t1,t5,t10,t1_10, t1_10_property, t1_10_head, t1_10_sewage, t1_10_vehicle)

rownames(table4.3) <- c("Post Tax Holiday Payment 1",
                        "Post Tax Holiday Payment 5",
                        "Post Tax Holiday Payment 10",
                        "Post Tax Holiday Payments 1-10",
                        "Post Tax Holiday Payments 1-10 (Property)",
                        "Post Tax Holiday Payments 1-10 (Head)",
                        "Post Tax Holiday Payments 1-10 (Sewage)",
                        "Post Tax Holiday Payments 1-10 (Vehicle)")

colnames(table4.3) <- c("Control Mean", "ACE", "SE_ACE", "p-value", "CACE","SE_CACE", "p-value")


# Table
round(table4.3,digits = 2)

##############################################
message("MAIN PAPER: Figure 4")

# Treatment Effects By Type of Tax: Holiday vs. No Holiday


gtp_taxes <- taxes_panel[taxes_panel$ES_BP==1, ]
t <- unique(gtp_taxes$t_st)
t <- t[order(t)]
t <- t[t>-11 & t<=21]

gtp_plot <- NULL

#No Holiday
for (i in 1:length(t)){
  
  temp <- gtp_taxes[gtp_taxes$t_st == t[i], ]
  
  if (nrow(temp[temp$TREATMENT==1 & temp$holiday_type==0,])==0) next
  if (nrow(temp[temp$TREATMENT==0 & temp$holiday_type==0,])==0) next
  
  on_time_noholiday <- robust.se(ivreg(en_fecha ~ TIENE_EXO, ~ TREATMENT, 
                                       data=temp[temp$holiday_type==0,]))[2,1:2]
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], on_time_noholiday, "No Holiday")))
  
  print(i)
}

#Holiday
for (i in 1:length(t)){
  
  temp <- gtp_taxes[gtp_taxes$t_st == t[i], ]
  
  if (nrow(temp[temp$TREATMENT==1 & temp$holiday_type==1,])==0) next
  if (nrow(temp[temp$TREATMENT==0 & temp$holiday_type==1,])==0) next
  
  on_time_holiday <- robust.se(ivreg(en_fecha ~ TIENE_EXO, ~ TREATMENT, 
                                     data=temp[temp$holiday_type==1,]))[2,1:2]
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], on_time_holiday, "Holiday")))
  print(i)
}

gtp_plot <- as.data.frame(gtp_plot)
names(gtp_plot) <- c("outcome", "t", "CACE", "SE", "lottery_type")

gtp_plot$t <- as.numeric(as.character(gtp_plot$t))
gtp_plot$CACE <- as.numeric(as.character(gtp_plot$CACE))
gtp_plot$SE <- as.numeric(as.character(gtp_plot$SE))


#Difference
diff.hte <- NULL

for (i in 1:length(t)){
  
  temp <- gtp_plot[gtp_plot$t==t[i],]
  
  if (nrow(temp)!=2) next
  
  diff <- temp$CACE[temp$lottery_type=="Holiday"] - temp$CACE[temp$lottery_type=="No Holiday"]
  
  SE <- sqrt((temp$SE[temp$lottery_type=="Holiday"])^2 + (temp$CACE[temp$lottery_type=="No Holiday"])^2)
  
  diff.hte <- rbind.data.frame(diff.hte, as.vector(c(t[i], diff, SE)))
  
  print(i)
}

names(diff.hte) <- c("t", "CACE", "SE")
diff.hte1 <- diff.hte
diff.hte$lottery_type <- "Difference"
diff.hte$outcome <- "Paid on Time"


gtp_plot <- rbind.data.frame(gtp_plot, diff.hte)

gtp_plot$upper <- gtp_plot$CACE + qnorm(.975) * gtp_plot$SE 
gtp_plot$lower <- gtp_plot$CACE - qnorm(.975) * gtp_plot$SE

gtp_plot$lottery_type <- as.factor(gtp_plot$lottery_type)
gtp_plot$lottery_type <- relevel(gtp_plot$lottery_type, ref= "No Holiday")
gtp_plot$lottery_type <- relevel(gtp_plot$lottery_type, ref= "Holiday")

# Calculate the p-value for the difference (one and two-tailed tests)

gtp_plot$t.stat <- gtp_plot$CACE/gtp_plot$SE
gtp_plot$p_value.2 <- 2 * (1 - pnorm(abs(gtp_plot$t.stat)))  # Two-tailed test
gtp_plot$p_value.1 <- (1 - pnorm(abs(gtp_plot$t.stat)))  # One-tailed test
gtp_plot$stars <- ifelse(gtp_plot$p_value.1<=.05 & 
                           gtp_plot$t>0, "+", " ") # add one-tailed for post-treatment periods

#Figure
p <- ggplot(gtp_plot[gtp_plot$t<14,], aes(x=t, y=CACE))
p + facet_grid(lottery_type~.) + 
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") + 
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed", color="gray60") +
  geom_text(aes(x = t, y = (upper + .05), label = stars), size = 5) + 
  geom_errorbar(aes(x=t,
                    ymin=lower, 
                    ymax=upper), 
                width=.1, size=.8, position = position_dodge(width = 0.5)) +
  geom_point(size=2.5, position = position_dodge(width = 0.5)) +
  xlab("Payments Since Tax Holiday") +
  ylab("Paid on Time (CACE)") +
  geom_vline(aes(xintercept=0), size=.7) +
  theme_bw() +
  scale_colour_manual(values = c("black","blue")) +
  scale_alpha_manual(values = c("FALSE"=0.8, "TRUE"=1), guide='none') +
  scale_x_discrete(limit = c(-10:0,4:13),
                   labels = as.character(-10:10)) + 
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        legend.position = "none",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))




##############################################
message("MAIN PAPER: Figure 5")

# Placebo test: Treatment Effects for Automatic vs. Manual Payers


gtp_taxes <- taxes_panel[taxes_panel$ES_BP==1, ]
gtp_taxes <- gtp_taxes[gtp_taxes$TRIBUTO!="Patente de Rodados",]
t <- unique(taxes_panel$t_st)
t <- t[order(t)]
t <- t[t>-11 & t<=28]
gtp_plot <- NULL

# Automatic Payment & Manual Payment
for (i in 1:length(t)){
  
  temp <- gtp_taxes[gtp_taxes$t_st == t[i], ]
  
  if (nrow(temp[temp$TREATMENT==1,])==0) next
  if (nrow(temp[temp$TREATMENT==0,])==0) next
  
  on_time_auto <- robust.se(ivreg(en_fecha ~ TIENE_EXO, ~ TREATMENT, 
                                  data=temp[temp$autopay_win==1,]))[2,1:2]
  on_time_manual <- robust.se(ivreg(en_fecha ~ TIENE_EXO, ~ TREATMENT, 
                                    data=temp[temp$autopay_win==0,]))[2,1:2]
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], on_time_auto, "Automatic Payment")),
                    as.vector(c("Paid on Time", t[i], on_time_manual, "Manual Payment"))
  )
  print(i)
}


gtp_plot <- as.data.frame(gtp_plot)
names(gtp_plot) <- c("outcome", "t", "CACE", "SE", "sample")

gtp_plot$t <- as.numeric(as.character(gtp_plot$t))
gtp_plot$CACE <- as.numeric(as.character(gtp_plot$CACE))
gtp_plot$SE <- as.numeric(as.character(gtp_plot$SE))

# Difference
diff.hte <- NULL

for (i in 1:length(t)){
  
  temp <- gtp_plot[gtp_plot$t==t[i],]
  
  if (nrow(temp)!=2) next 
  
  diff <- temp$CACE[temp$sample=="Manual Payment"] - temp$CACE[temp$sample=="Automatic Payment"]
  
  SE <- sqrt((temp$SE[temp$sample=="Manual Payment"])^2 + (temp$CACE[temp$sample=="Automatic Payment"])^2)
  
  diff.hte <- rbind.data.frame(diff.hte, 
                               as.vector(c(t[i], diff, SE)))
  
  print(i)
}


names(diff.hte) <- c("t", "CACE", "SE")
diff.hte2 <- diff.hte
diff.hte$sample <- "Difference"
diff.hte$outcome <- "Paid on Time"


gtp_plot <- rbind.data.frame(gtp_plot, diff.hte)

gtp_plot$upper <- gtp_plot$CACE + qnorm(.975) * gtp_plot$SE 
gtp_plot$lower <- gtp_plot$CACE - qnorm(.975) * gtp_plot$SE

gtp_plot$sample <- as.factor(gtp_plot$sample)
gtp_plot$sample <- relevel(gtp_plot$sample, ref= "Automatic Payment")
gtp_plot$sample <- relevel(gtp_plot$sample, ref= "Manual Payment")

# Calculate the p-value for the difference (one and two-tailed tests)
gtp_plot$t.stat <- gtp_plot$CACE/gtp_plot$SE
gtp_plot$p_value.2 <- 2 * (1 - pnorm(abs(gtp_plot$t.stat)))  # Two-tailed test
gtp_plot$p_value.1 <- (1 - pnorm(abs(gtp_plot$t.stat)))  # One-tailed test
gtp_plot$stars <- ifelse(gtp_plot$p_value.1<=.05 & 
                           gtp_plot$t>0, "+", " " ) # add one-tailed for post-treatment periods


# Figure
p <- ggplot(gtp_plot[gtp_plot$t<14,], aes(x=t, y=CACE))
p + facet_grid(sample~.) + 
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed", color="gray60") +
  geom_text(aes(x = t, y = (upper + .05), label = stars), size = 5) + 
  geom_errorbar(aes(x=t,
                    ymin=lower, 
                    ymax=upper), 
                width=.1, size=.8, position = position_dodge(width = 0.5)) +
  geom_point(size=2.5, position = position_dodge(width = 0.5)) +
  xlab("Payments Since Tax Holiday") +
  ylab("Paid on Time (CACE)") +
  geom_vline(aes(xintercept=0), size=.7) +
  theme_bw() +
  scale_colour_manual(values = c("black","blue")) +
  scale_alpha_manual(values = c("FALSE"=0.8, "TRUE"=1), guide='none') +
  scale_x_discrete(limit = c(-10:0,4:13),
                   labels = as.character(-10:10)) + 
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        legend.position = "none",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))

##############################################
message("MAIN PAPER: Figure 6")

# The Stock of Habit: Perfect vs. Imperfect Past Compliers


taxpayer_type <- ddply(gtp_taxes[gtp_taxes$t_st<=0 & gtp_taxes$t_st>-15,], 
                       "CUENTA", summarise,
                       type = mean(en_fecha, na.rm=T))

gtp_taxes <- merge(gtp_taxes, taxpayer_type, by="CUENTA", all.x=T) 
t <- unique(gtp_taxes$t_st)
t <- t[order(t)]
t <- t[t>-11 & t<=21]
gtp_plot <- NULL

# Imperfect Past Complier
for (i in 1:length(t)){
  
  temp <- gtp_taxes[gtp_taxes$t_st == t[i], ]
  
  if (nrow(temp[temp$TREATMENT==1 & temp$type!=1,])==0) next
  if (nrow(temp[temp$TREATMENT==0 & temp$type!=1,])==0) next
  
  on_time_marginal <- robust.se(ivreg(en_fecha ~ TIENE_EXO, ~ TREATMENT, 
                                      data=temp[temp$type!=1,]))[2,1:2]
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], 
                                on_time_marginal, "Imperfect Past Complier")))
  print(i)
}

# Perfect Past Complier
for (i in 1:length(t)){
  
  temp <- gtp_taxes[gtp_taxes$t_st == t[i], ]
  
  if (nrow(temp[temp$TREATMENT==1 & temp$type==1,])==0) next
  if (nrow(temp[temp$TREATMENT==0 & temp$type==1,])==0) next
  
  on_time_compliant <- robust.se(ivreg(en_fecha ~ TIENE_EXO, ~ TREATMENT, 
                                       data=temp[temp$type==1,]))[2,1:2]
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], 
                                on_time_compliant, "Perfect Past Complier")))
  print(i)
}

gtp_plot <- as.data.frame(gtp_plot)
names(gtp_plot) <- c("outcome", "t", "CACE", "SE", "sample")

gtp_plot$t <- as.numeric(as.character(gtp_plot$t))
gtp_plot$CACE <- as.numeric(as.character(gtp_plot$CACE))
gtp_plot$SE <- as.numeric(as.character(gtp_plot$SE))

# Difference
diff.hte <- NULL

for (i in 1:length(t)){
  
  temp <- gtp_plot[gtp_plot$t==t[i],]
  
  if (nrow(temp)!=2) next 
  
  diff <- temp$CACE[temp$sample=="Imperfect Past Complier"] - temp$CACE[temp$sample=="Perfect Past Complier"]
  
  SE <- sqrt((temp$SE[temp$sample=="Imperfect Past Complier"])^2 + (temp$CACE[temp$sample=="Perfect Past Complier"])^2)
  
  diff.hte <- rbind.data.frame(diff.hte, as.vector(c(t[i], diff, SE)))
  
  print(i)
}


names(diff.hte) <- c("t", "CACE", "SE")
diff.hte3 <- diff.hte
diff.hte$sample <- "Difference"
diff.hte$outcome <- "Paid on Time"

gtp_plot <- rbind.data.frame(gtp_plot, diff.hte)

gtp_plot$upper <- gtp_plot$CACE + qnorm(.975) * gtp_plot$SE 
gtp_plot$lower <- gtp_plot$CACE - qnorm(.975) * gtp_plot$SE

gtp_plot$sample  <- as.factor(gtp_plot$sample)
gtp_plot$sample  <- relevel(gtp_plot$sample, ref= "Perfect Past Complier")
gtp_plot$sample  <- relevel(gtp_plot$sample, ref= "Imperfect Past Complier")

gtp_plot$upper <- gtp_plot$CACE + qnorm(.975) * gtp_plot$SE 
gtp_plot$lower <- gtp_plot$CACE - qnorm(.975) * gtp_plot$SE

# Calculate the p-value for the difference (one and two-tailed tests)
gtp_plot$t.stat <- gtp_plot$CACE/gtp_plot$SE
gtp_plot$p_value.2 <- 2 * (1 - pnorm(abs(gtp_plot$t.stat)))  # Two-tailed test
gtp_plot$p_value.1 <- (1 - pnorm(abs(gtp_plot$t.stat)))  # One-tailed test
gtp_plot$stars <- ifelse(gtp_plot$p_value.1<=.05 &
                           gtp_plot$t>0, "+", " ") # add one-tailed for post-treatment periods


# Figure
p <- ggplot(gtp_plot[gtp_plot$t<14,], aes(x=t, y=CACE))
p + facet_grid(sample ~.) + 
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed", color="gray60") +
  geom_text(aes(x = t, y = (upper + .05), label = stars), size = 5) + 
  geom_errorbar(aes(x=t,
                    ymin=lower, 
                    ymax=upper), 
                width=.1, size=.8, position = position_dodge(width = 0.5)) +
  geom_point(size=2.5, position = position_dodge(width = 0.5)) +
  xlab("Payments Since Tax Holiday") +
  ylab("Paid on Time (CACE)") +
  geom_vline(aes(xintercept=0), size=.7) +
  theme_bw() +
  scale_colour_manual(values = c("black","blue")) +
  scale_alpha_manual(values = c("FALSE"=0.8, "TRUE"=1), guide='none') +
  scale_x_discrete(limit = c(-10:0,4:13),
                   labels = as.character(-10:10)) + 
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        legend.position = "none",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))




##############################################
message("MAIN PAPER: Figure 7")

# Field Experiment: Effects of Information About the Tax Holiday on Compliance

# Weigths for treatment - placebo comparison
table(fieldex$type, fieldex$pooled_124_0)
table.weights <- 1/prop.table(table(fieldex$type, fieldex$pooled_124_0), 1)
table.weights <- melt(table.weights)
names(table.weights) <- c("type", "pooled_124_0", "pooled_124_0_wts")
fieldex <- fieldex %>% left_join(table.weights)

# Weights for treatment versus pure control
table(fieldex$type, fieldex$pooled_124_6)
table.weights <- 1/prop.table(table(fieldex$type, fieldex$pooled_124_6), 1)
table.weights <- melt(table.weights)
names(table.weights) <- c("type", "pooled_124_6", "pooled_124_6_wts")
fieldex <- fieldex %>% left_join(table.weights)

# Weights for placebo versus pure control
table(fieldex$type, fieldex$pooled_0_6)
table.weights <- 1/prop.table(table(fieldex$type, fieldex$pooled_0_6), 1)
table.weights <- melt(table.weights)
names(table.weights) <- c("type", "pooled_0_6", "pooled_0_6_wts")
fieldex <- fieldex %>% left_join(table.weights)


# Add variable to summarize compliance between 2014-2016
fieldex <-  fieldex %>% dplyr::mutate(
  compliance_1416 = (JUL_2014_ontime + NOV_2014_ontime +
                       MAR_2015_ontime + JUL_2015_ontime + 
                       NOV_2015_ontime + MAR_2016_ontime + 
                       JUL_2016_ontime)/12,
  intended_1416 = (JUL_2014_WEBACCESS + NOV_2014_WEBACCESS +
                     MAR_2015_WEBACCESS + JUL_2015_WEBACCESS + 
                     NOV_2015_WEBACCESS + MAR_2016_WEBACCESS + 
                     JUL_2016_WEBACCESS)/12
)


#### TREATMENT VERSUS PURE CONTROL
# a) compliance
comp_control <- rbind.data.frame(
  difference_in_means(JUL_2014_ontime ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(NOV_2014_ontime ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(MAR_2015_ontime ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(JUL_2015_ontime ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(NOV_2015_ontime ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(MAR_2016_ontime ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(JUL_2016_ontime ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(compliance_1416 ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex)
)
comp_control$outcome <- "Paid on Time"
comp_control$control <- "Treatment vs Pure Control"

# b) Intended compliance
intcomp_control <- rbind.data.frame(
  difference_in_means(JUL_2014_WEBACCESS ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(NOV_2014_WEBACCESS ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(MAR_2015_WEBACCESS ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(JUL_2015_WEBACCESS ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(NOV_2015_WEBACCESS ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(MAR_2016_WEBACCESS ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(JUL_2016_WEBACCESS ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex),
  difference_in_means(intended_1416 ~ pooled_124_6, weights = pooled_124_6_wts, 
                      data = fieldex)
)
intcomp_control$outcome <- "Web Access"
intcomp_control$control <- "Treatment vs Pure Control"


#### TREATMENT VERSUS PLACEBO 
# a) Compliance 
comp_placebo <- rbind.data.frame(
  difference_in_means(JUL_2014_ontime ~ pooled_124_0, weights = pooled_124_0_wts, 
                      data = fieldex),
  difference_in_means(NOV_2014_ontime ~ pooled_124_0, weights = pooled_124_0_wts, 
                      data = fieldex),
  difference_in_means(MAR_2015_ontime ~ pooled_124_0, weights = pooled_124_0_wts, 
                      data = fieldex),
  difference_in_means(JUL_2015_ontime ~ pooled_124_0, weights = pooled_124_0_wts, 
                      data = fieldex),
  difference_in_means(NOV_2015_ontime ~ pooled_124_0, weights = pooled_124_0_wts, 
                      data = fieldex),
  difference_in_means(MAR_2016_ontime ~ pooled_124_0, weights = pooled_124_0_wts, 
                      data = fieldex),
  difference_in_means(JUL_2016_ontime ~ pooled_124_0, weights = pooled_124_0_wts, 
                      data = fieldex),
  difference_in_means(compliance_1416 ~ pooled_124_0, weights = pooled_124_0_wts, 
                      data = fieldex)
)
comp_placebo$outcome <- "Paid on Time"
comp_placebo$control <- "Treatment vs Placebo"

# b) Intended compliance
intcomp_placebo <- rbind.data.frame(
  difference_in_means(JUL_2014_WEBACCESS ~ pooled_124_0, 
                      weights = pooled_124_0_wts, data = fieldex),
  difference_in_means(NOV_2014_WEBACCESS ~ pooled_124_0, 
                      weights = pooled_124_0_wts, data = fieldex),
  difference_in_means(MAR_2015_WEBACCESS ~ pooled_124_0, 
                      weights = pooled_124_0_wts, data = fieldex),
  difference_in_means(JUL_2015_WEBACCESS ~ pooled_124_0, 
                      weights = pooled_124_0_wts, data = fieldex),
  difference_in_means(NOV_2015_WEBACCESS ~ pooled_124_0, 
                      weights = pooled_124_0_wts, data = fieldex),
  difference_in_means(MAR_2016_WEBACCESS ~ pooled_124_0, 
                      weights = pooled_124_0_wts, data = fieldex),
  difference_in_means(JUL_2016_WEBACCESS ~ pooled_124_0, 
                      weights = pooled_124_0_wts, data = fieldex),
  difference_in_means(intended_1416 ~ pooled_124_0, 
                      weights = pooled_124_0_wts, data = fieldex)
)
intcomp_placebo$outcome <- "Web Access"
intcomp_placebo$control <- "Treatment vs Placebo"


#### PURE CONTROL VERSUS PLACEBO 
# a) Compliance 
comp_control_pla <- rbind.data.frame(
  difference_in_means(JUL_2014_ontime ~ pooled_0_6, weights = pooled_0_6_wts, 
                      data = fieldex),
  difference_in_means(NOV_2014_ontime ~ pooled_0_6, weights = pooled_0_6_wts, 
                      data = fieldex),
  difference_in_means(MAR_2015_ontime ~ pooled_0_6, weights = pooled_0_6_wts, 
                      data = fieldex),
  difference_in_means(JUL_2015_ontime ~ pooled_0_6, weights = pooled_0_6_wts, 
                      data = fieldex),
  difference_in_means(NOV_2015_ontime ~ pooled_0_6, weights = pooled_0_6_wts, 
                      data = fieldex),
  difference_in_means(MAR_2016_ontime ~ pooled_0_6, weights = pooled_0_6_wts, 
                      data = fieldex),
  difference_in_means(JUL_2016_ontime ~ pooled_0_6, weights = pooled_0_6_wts, 
                      data = fieldex),
  difference_in_means(compliance_1416 ~ pooled_0_6, weights = pooled_0_6_wts, 
                      data = fieldex)
)
comp_control_pla$outcome <- "Paid on Time"
comp_control_pla$control <- "Placebo vs Pure Control"

# b) Intended compliance
intcomp_control_pla <- rbind.data.frame(
  difference_in_means(JUL_2014_WEBACCESS ~ pooled_0_6, 
                      weights = pooled_0_6_wts, data = fieldex),
  difference_in_means(NOV_2014_WEBACCESS ~ pooled_0_6, 
                      weights = pooled_0_6_wts, data = fieldex),
  difference_in_means(MAR_2015_WEBACCESS ~ pooled_0_6, 
                      weights = pooled_0_6_wts, data = fieldex),
  difference_in_means(JUL_2015_WEBACCESS ~ pooled_0_6, 
                      weights = pooled_0_6_wts, data = fieldex),
  difference_in_means(NOV_2015_WEBACCESS ~ pooled_0_6, 
                      weights = pooled_0_6_wts, data = fieldex),
  difference_in_means(MAR_2016_WEBACCESS ~ pooled_0_6, 
                      weights = pooled_0_6_wts, data = fieldex),
  difference_in_means(JUL_2016_WEBACCESS ~ pooled_0_6, 
                      weights = pooled_0_6_wts, data = fieldex),
  difference_in_means(intended_1416 ~ pooled_0_6, 
                      weights = pooled_0_6_wts, data = fieldex)
)
intcomp_control_pla$outcome <- "Web Access"
intcomp_control_pla$control <- "Placebo vs Pure Control"

## Combine and plot
plotdata <- rbind.data.frame(comp_placebo, comp_control, comp_control_pla, 
                             intcomp_placebo, intcomp_control, 
                             intcomp_control_pla)
rm(comp_placebo, comp_control,
   intcomp_placebo, intcomp_control,
   comp_control_pla, intcomp_control_pla)

plotdata$time <- rep(1:8, 6)

plotdata$control <- as.factor(plotdata$control)

pd <- position_dodge(width = 0.6)
ggplot(plotdata, aes(x=time, y=coefficients, group = control, shape = control)) + 
  facet_wrap( ~ outcome) + 
  geom_point(size=4.5, position=pd) +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed") + 
  geom_errorbar(aes(x=time,
                    ymin=conf.low, 
                    ymax=conf.high), 
                width=.15, size=1, position=pd) +
  xlab("Payments Since Receiving Flyer") + ylab("Average Causal Effect") + 
  theme_minimal() +
  scale_colour_manual(values = c("black", "black")) +
  scale_x_continuous(breaks=1:8,
                     labels=c(as.character(1:7), "pooled")) + 
  theme(legend.position = "bottom",
        legend.title=element_blank()) 


##############################################
message("MAIN PAPER: Figure 8")

#  Survey Experiment: Effects of Information about the Lottery on Attitudes Towards Taxation

# Select all variables and take the mean, N & SE for 
discretion1 <- ddply(survey_data, "treat_discretion",
                     summarise,
                     mean=mean(S1p4,na.rm=T),
                     N=length(na.omit(S1p4,na.rm=T)),
                     se=sd(S1p4,na.rm=T)/sqrt(N)) 

discretion2 <- ddply(survey_data, "treat_discretion",
                     summarise,
                     mean=mean(S1p1,na.rm=T),
                     N=length(na.omit(S1p1,na.rm=T)),
                     se=sd(S1p1,na.rm=T)/sqrt(N))

discretion3 <-  ddply(survey_data, "treat_discretion",
                      summarise,
                      mean=mean(S1p3,na.rm=T),
                      N=length(na.omit(S1p3,na.rm=T)),
                      se=sd(S1p3,na.rm=T)/sqrt(N))

discretion4 <- ddply(survey_data, "treat_discretion",
                     summarise,
                     mean=mean(S1p2,na.rm=T),
                     N=length(na.omit(S1p2,na.rm=T)),
                     se=sd(S1p2, na.rm=T)/sqrt(N)) 

discretion5 <- ddply(survey_data, "treat_discretion",
                     summarise,
                     mean=mean(S1p5,na.rm=T),
                     N=length(na.omit(S1p5,na.rm=T)),
                     se=sd(S1p5, na.rm=T)/sqrt(N)) 

# all together
discretion <- as.data.frame(rbind(discretion1, discretion2, discretion3,
                                  discretion4, discretion5))
rm(discretion1, discretion2, discretion3,discretion4, discretion5)

discretion$outcome <- rep(c("Rewards Go To The\n Same People As Always",
                            "Rewards Are A\n Waste Of Money",
                            "Worth It To Be\n Up To Date",
                            "Municipal Government\n Does A Good Job",
                            "Municipal Taxes\n Are Just"), each=2)


# Calculate the difference in means for all variables between the Lottery and Discretionary group
discretion_lot<-discretion[discretion$treat_discretion==0,] #Lottery
discretion_dis<-discretion[discretion$treat_discretion==1,] #Discretionary

diff<-data.frame(matrix(ncol=6,nrow=5))
colnames(diff)<-c('outcome','Estimate','Std. Error','p value','upper','lower')

diff$outcome <- c("Rewards Go To The\n Same People As Always",
                  "Rewards Are A\n Waste Of Money",
                  "Worth It To Be\n Up To Date",
                  "Municipal Government\n Does A Good Job",
                  "Municipal Taxes\n Are Just")
for (i in 1:5){
  
  aux_1<-discretion_lot[i,] #Lottery
  aux_0<-discretion_dis[i,] #Discretionary
  
  # Estimate
  mean<-aux_1$mean-aux_0$mean
  # Calculate the standard error of the difference between the means
  se_diff <- sqrt((aux_1$se^2) + (aux_0$se^2))
  # Calculate the t-statistic
  t_stat <- (aux_1$mean - aux_0$mean) / se_diff
  # Calculate the degrees of freedom
  df <- aux_1$N+ aux_0$N - 2
  # Calculate the p-value (two-tailed test)
  p_value <- 2 * pt(-abs(t_stat), df)
  p_value
  # calculate upper and lower CI
  upper <- mean + 1.96*se_diff
  lower <- mean - 1.96*se_diff
  
  diff[i,'Estimate']<-mean
  diff[i,'Std. Error']<-se_diff
  diff[i,'p value']<-p_value
  diff[i,'upper']<-upper
  diff[i,'lower']<-lower
}

# Figure
p <- ggplot(diff, aes(x=outcome, y=Estimate))
p + geom_point(size=4.5) +
  xlab("Outcome variables") +
  ylab("Estimated Effect \n (Lottery minus Discretionary Treatment)") +
  geom_errorbar(aes(x=outcome,
                    ymin=lower, 
                    ymax=upper), 
                width=.1, size=.8) +
  coord_cartesian(ylim = c(-3, 3))+
  geom_hline(yintercept = 0, linetype = "dashed", color = 'black')+
  theme_bw() + 
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 90, hjust = 0.5))  



################################################################################
# APPENDIX Tables & Figures
################################################################################

##############################################
message("APPENDIX: Figure A1")

# Reward Programs for Good Taxpayers in Latin America

# prep data ---
latam_map_data$per4<-as.numeric(gsub('%','',latam_map_data$per3))
latam_map_data$per4<-ifelse(is.na(latam_map_data$per4),0,latam_map_data$per4)
latam_map_data$group<-NA
latam_map_data$group[latam_map_data$per4==0]<-1
latam_map_data$group[latam_map_data$per4>=0.15 & latam_map_data$per4<=2]<-2
latam_map_data$group[latam_map_data$per4>=2.1 & latam_map_data$per4<=9.5]<-3
latam_map_data$group[latam_map_data$per4>=9.6 & latam_map_data$per4<=37]<-4
latam_map_data$group[latam_map_data$per4>=37.1 & latam_map_data$per4<=79]<-5

latam_map_data$label<-paste0(latam_map_data$NAME,'\n',latam_map_data$per3)
latam_map_data$label<-ifelse(is.na(latam_map_data$per3),NA,latam_map_data$label)


# Figure

labels<-c('0% - 0.14%','0.15% - 2%','2.1%- 9.5%','9.6% - 37%','37.1%-79%')

map<-ggplot() +
  geom_sf(data = latam_map_data,aes(fill=factor(group),  geometry = geometry))+
  scale_fill_manual(values = c("white", "grey80", "grey60", "grey20", "grey5"), name = "group", labels = labels) +
  geom_sf_label(data = latam_map_data, aes(label = label), 
                fill='white',size = 2.5,nudge_x = -4,fontface = "bold") + #label.size = 0,
  theme_classic()+
  theme(plot.background = element_rect(fill = "white"),
        panel.background = element_rect(colour = "white",),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank(),
        legend.position = 'right',
        legend.title = element_blank())+
  coord_sf()

map


##############################################
message("APPENDIX: Table A1")

#Balance test

gtp_panel <- buen_pagador_panel
gtp_panel <- gtp_panel[abs(gtp_panel$centered)<58,]

taxes_panel$t[taxes_panel$TRIBUTO=="TS" | taxes_panel$TRIBUTO=="TD"] <- taxes_panel$t/2


balance <- rbind(
  with(gtp_panel[gtp_panel$centered==0,], ttest(BP_time, TREATMENT)),
  with(gtp_panel[gtp_panel$centered==-1,], ttest(BP_time, TREATMENT)),
  with(gtp_panel[gtp_panel$centered==-2,], ttest(BP_time, TREATMENT)),
  with(gtp_panel[gtp_panel$centered==-3,], ttest(BP_time, TREATMENT)),
  with(gtp_panel[gtp_panel$centered==-4,], ttest(BP_time, TREATMENT)),
  with(gtp_panel[gtp_panel$centered==-5,], ttest(BP_time, TREATMENT)),
  with(gtp_panel[gtp_panel$centered==0,], ttest(autopay_win, TREATMENT)),
  with(gtp_panel[gtp_panel$centered==-1,], ttest(autopay_win, TREATMENT)),
  with(gtp_panel[gtp_panel$centered==-2,], ttest(autopay_win, TREATMENT)),
  with(gtp_panel[gtp_panel$centered==-3,], ttest(autopay_win, TREATMENT)),
  ttest(naturalex$VALOR_CAT2004, naturalex$TREATMENT),
  ttest(naturalex$VALOR_CATASTRALACTUAL, naturalex$TREATMENT),
  ttest(naturalex$rental, naturalex$TREATMENT),
  ttest(naturalex$JUBILADO, naturalex$TREATMENT),
  ttest(naturalex$TODOPAGO, naturalex$TREATMENT))

balance <- round(balance, 3)
balance <- as.data.frame(balance)
rownames(balance) <-  c("Good taxpayer at t=0",
                        "Good taxpayer at t=-1",
                        "Good taxpayer at t=-2",
                        "Good taxpayer at t=-3",
                        "Good taxpayer at t=-4",
                        "Good taxpayer at t=-5",
                        "Automatic debit at t=0",
                        "Automatic debit at t=-1",
                        "Automatic debit at t=-2",
                        "Automatic debit at t=-3",
                        "2004 Property value", 
                        "Current property value", 
                        "Rented Property", 
                        "Retiree", 
                        "Paid year in full")

balance$sample <- c(rep("All Taxes", 6),
                    rep("Property, head & sewage", 7),
                    rep("Property", 2))

balance[,c(2:4,6,8,9)]


##############################################
message('APPENDIX: Figure A3')

# Natural Experiment: Property Plots of Winning Account Numbers.


# Winning Account number
#map
map<-ggplot() +
  geom_sf(data = shp2, aes(geometry = geometry))+
  geom_sf(data = naturalex_data_map[naturalex_data_map$TREATMENT==1,], aes(geometry = geometry),color="magenta",size=1)+ #0.4
  #facet_wrap(~TRIBUTO)+
  theme_classic()+
  theme(plot.background = element_rect(fill = "white"),
        panel.background = element_rect(colour = "white",),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank(),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        legend.background = element_blank())+
  coord_sf()


map

# Non-Winning Account number
map<-ggplot() +
  geom_sf(data = shp2, aes(geometry = geometry))+
  geom_sf(data = naturalex_data_map[naturalex_data_map$TREATMENT==0,], aes(geometry = geometry),color="green",size=1)+ #0.4
  #facet_wrap(~TRIBUTO)+
  theme_classic()+
  theme(plot.background = element_rect(fill = "white"),
        panel.background = element_rect(colour = "white",),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank(),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        legend.background = element_blank())+
  coord_sf()


map

##############################################
message("APPENDIX: Table A2")

#Natural Experiment: Placebo Tests with Post-Treatment Variables - Ineligible Taxpayers.
#Winning vs. Non-Winning Account Numbers.

btp <- gtp_panel %>% filter(ES_BP==0)

placebo <- rbind.data.frame(
  with(btp %>% filter(centered==4), ttest(BP_time, TREATMENT)),
  with(btp %>% filter(centered==5), ttest(BP_time, TREATMENT)),
  with(btp %>% filter(centered==6), ttest(BP_time, TREATMENT)),
  with(btp %>% filter(centered==7), ttest(BP_time, TREATMENT)),
  with(btp %>% filter(centered==8), ttest(BP_time, TREATMENT)),
  with(btp %>% filter(centered %in% 4:8) %>% group_by(CUENTA, TREATMENT) %>% 
         dplyr::summarize(BP_time = mean(BP_time)), 
       ttest(BP_time, TREATMENT))
)

names(placebo) <- c("Mean 1", "Mean 0", "Difference", "SE_Diff",
                    "t-stat","N","df","p-value")

placebo$outcome <- c("Good taxpayer t=1",
                     "Good taxpayer t=2",
                     "Good taxpayer t=3",
                     "Good taxpayer t=4",
                     "Good taxpayer t=5",
                     "Good taxpayer t=1-5")

placebo

##############################################
message("APPENDIX: Figure A4(a)")

#Natural Experiment: The Negative Impact of Holidays on Compliance.
#Effects on the Number of Payments Owed

t <- unique(taxes_panel$t_st)
t <- t[order(t)]
t <- t[t %in% -10:20]
gtp_plot <- NULL

#CACE estimation for all periods
for (i in 1:length(t)){
  
  temp <- taxes_panel[taxes_panel$t_st == t[i], ]
  if (nrow(temp[temp$TREATMENT==1 & temp$ES_BP==0,])==0) next
  if (nrow(temp[temp$TREATMENT==0 & temp$ES_BP==0,])==0) next
  
  on_time <- t.test(nr_paymntsowed ~ TREATMENT, 
                    data=temp[temp$ES_BP==0,])
  on_time <- c(on_time$estimate[2]-on_time$estimate[1], 
               -on_time$conf.int[1:2])
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], 0, on_time)))
  
  if (nrow(temp[temp$TREATMENT==1 & temp$ES_BP==1,])==0) next
  if (nrow(temp[temp$TREATMENT==0 & temp$ES_BP==1,])==0) next
  
  ivest <- tidy(iv_robust(nr_paymntsowed ~ TIENE_EXO | TREATMENT, 
                          data=temp[temp$ES_BP==1,]))
  on_time <- c(ivest$estimate[2], ivest$std.error[2])
  
  
  
  on_time <- c(on_time[1], on_time[1]-1.96*on_time[2], on_time[1]+1.96*on_time[2])
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], 1, on_time)))
  
}

gtp_plot <- as.data.frame(gtp_plot)
names(gtp_plot) <- c("outcome", "t", "ES_BP", "CACE", "upper", "lower")

gtp_plot$t <- as.numeric(as.character(gtp_plot$t))
gtp_plot$CACE <- as.numeric(as.character(gtp_plot$CACE))
gtp_plot$upper <- as.numeric(as.character(gtp_plot$upper))
gtp_plot$lower <- as.numeric(as.character(gtp_plot$lower))

#Figure
ggplot(gtp_plot[gtp_plot$ES_BP==1,], aes(x=t, y=CACE)) +
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") +
  geom_errorbar(aes(x=t,
                    ymin=lower, 
                    ymax=upper), 
                width=.6, size=.8, position = position_dodge(width = 0.6)) +
  geom_point(size=4, position = position_dodge(width = 0.6)) +
  xlab("Payments Since Tax Holiday") +
  ylab("Number of Payments Owed (CACE)") +
  geom_vline(aes(xintercept=0), size=.7) +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed") +
  theme_bw() +
  scale_x_discrete(limit = c(-10:0,4:20),
                   labels = as.character(-10:17)) + 
  scale_colour_manual(values = c("black","blue")) +
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        legend.position = "bottom",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))

##############################################
message("APPENDIX: Figure A4(b)")

#Natural Experiment: The Negative Impact of Holidays on Compliance.
#Effects on the Compliance

gtp_plot <- NULL

#CACE estimation for all periods
for (i in 1:length(t)){
  
  temp <- taxes_panel[taxes_panel$t_st == t[i], ]
  if (nrow(temp[temp$TREATMENT==1 & temp$ES_BP==0,])==0) next
  if (nrow(temp[temp$TREATMENT==0 & temp$ES_BP==0,])==0) next
  
  on_time <- t.test(compliance ~ TREATMENT, 
                    data=temp[temp$ES_BP==0,])
  on_time <- c(on_time$estimate[2]-on_time$estimate[1], 
               -on_time$conf.int[1:2])
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], 0, on_time)))
  
  if (nrow(temp[temp$TREATMENT==1 & temp$ES_BP==1,])==0) next
  if (nrow(temp[temp$TREATMENT==0 & temp$ES_BP==1,])==0) next
  
  ivest <- tidy(iv_robust(compliance ~ TIENE_EXO | TREATMENT, 
                          data=temp[temp$ES_BP==1,]))
  on_time <- c(ivest$estimate[2], ivest$std.error[2])
  
  on_time <- c(on_time[1], on_time[1]-1.96*on_time[2], on_time[1]+1.96*on_time[2])
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], 1, on_time)))
  
}

gtp_plot <- as.data.frame(gtp_plot)
names(gtp_plot) <- c("outcome", "t", "ES_BP", "CACE", "upper", "lower")

gtp_plot$t <- as.numeric(as.character(gtp_plot$t))
gtp_plot$CACE <- as.numeric(as.character(gtp_plot$CACE))
gtp_plot$upper <- as.numeric(as.character(gtp_plot$upper))
gtp_plot$lower <- as.numeric(as.character(gtp_plot$lower))

#Figure
ggplot(gtp_plot[gtp_plot$ES_BP==1,], aes(x=t, y=CACE)) + 
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") +
  geom_errorbar(aes(x=t,
                    ymin=lower, 
                    ymax=upper), 
                width=.6, size=.8, position = position_dodge(width = 0.6)) +
  geom_point(size=4, position = position_dodge(width = 0.6)) +
  xlab("Payments Since Tax Holiday") +
  ylab("Compliance (CACE)") +
  geom_vline(aes(xintercept=0), size=.7) +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed") +
  theme_bw() +
  scale_x_discrete(limit = c(-10:0,4:20),
                   labels = as.character(-10:17)) + 
  scale_colour_manual(values = c("black","blue")) +
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        legend.position = "bottom",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))

##############################################
message("APPENDIX: Figure A4(c)")

#Natural Experiment: The Negative Impact of Holidays on Compliance.
#Effects on the Total Debt as of October 2014

difference_in_means(debt_amount ~ won_lottery, 
                    data = naturalex_debt_gtp)
#N
nrow(naturalex_debt_gtp)

##############################################
message("APPENDIX: Figure A5")

# Main Results - Full Post Treatment Period
# The Negative Impact of Holidays on Compliance

table(taxes_panel$TREATMENT, taxes_panel$TIENE_EXO)

t <- -10:21
btp <- NULL
gtp <- NULL

for (i in t){
  
  temp <- taxes_panel[taxes_panel$t_st == i, ]
  
  # For Ineligible Taxpayers
  btp_est <- tidy(lm_robust(en_fecha ~ TREATMENT, 
                            data = filter(temp, ES_BP==0)))[2,]
  btp <- rbind.data.frame(btp, cbind.data.frame(i, "Ineligible Taxpayers", btp_est))
  
  # For Eligible Taxpayers
  # For good taxpayers, skip periods under the tax holiday
  if (nrow(temp[temp$TREATMENT==1 & temp$ES_BP==1,])==0) next
  
  gtp_est <- tidy(iv_robust(en_fecha ~ TIENE_EXO | TREATMENT, 
                            data = filter(temp, ES_BP==1)))[2,]
  gtp <- rbind.data.frame(gtp, cbind.data.frame(i, "Eligible Taxpayers", gtp_est))
  
}

names(gtp)[2] <- names(btp)[2] <- "type"
plot <- rbind.data.frame(gtp, btp); rm(gtp, btp)


#Figure
ggplot(plot, aes(x=i, y=estimate, shape=type)) + 
  facet_grid(type~.) + 
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") +
  geom_errorbar(aes(x=i,
                    ymin=conf.low, 
                    ymax=conf.high), 
                width=.6, size=.8, position = position_dodge(width = 0.6)) +
  geom_point(size=4, position = position_dodge(width = 0.6)) +
  xlab("Payments Since Tax Holiday") +
  ylab("Paid on Time (CACE)") +
  geom_vline(aes(xintercept=0), size=.7) +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed") +
  theme_bw() +
  scale_x_discrete(limit = t[!t %in% c(1,2,3)], 
                   labels = as.character(c(t[t<1],
                                           t[!t%in%c(1,2,3) & t>0]-3))) +
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        legend.position = "bottom",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))



##############################################
message("APPENDIX: Figure A6")

# Treatment Effects By Type of Tax: Holiday vs. No Holiday 
# Full Post Treatment Period

gtp_taxes <- taxes_panel[taxes_panel$ES_BP==1, ]
t <- unique(gtp_taxes$t_st)
t <- t[order(t)]
t <- t[t>-11 & t<=21]
gtp_plot <- NULL

# No Holiday
for (i in 1:length(t)){
  
  temp <- gtp_taxes[gtp_taxes$t_st == t[i], ]
  
  if (nrow(temp[temp$TREATMENT==1 & temp$holiday_type==0,])==0) next
  if (nrow(temp[temp$TREATMENT==0 & temp$holiday_type==0,])==0) next
  
  on_time_noholiday <- robust.se(ivreg(en_fecha ~ TIENE_EXO, ~ TREATMENT, 
                                       data=temp[temp$holiday_type==0,]))[2,1:2]
  
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], on_time_noholiday, "No Holiday")))
  
  print(i)
}

#Holiday
for (i in 1:length(t)){
  
  temp <- gtp_taxes[gtp_taxes$t_st == t[i], ]
  
  if (nrow(temp[temp$TREATMENT==1 & temp$holiday_type==1,])==0) next
  if (nrow(temp[temp$TREATMENT==0 & temp$holiday_type==1,])==0) next
  
  on_time_holiday <- robust.se(ivreg(en_fecha ~ TIENE_EXO, ~ TREATMENT, 
                                     data=temp[temp$holiday_type==1,]))[2,1:2]
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], on_time_holiday, "Holiday")))
  
  print(i)
}

gtp_plot <- as.data.frame(gtp_plot)
names(gtp_plot) <- c("outcome", "t", "CACE", "SE", "lottery_type")

gtp_plot$t <- as.numeric(as.character(gtp_plot$t))
gtp_plot$CACE <- as.numeric(as.character(gtp_plot$CACE))
gtp_plot$SE <- as.numeric(as.character(gtp_plot$SE))

# Difference
diff.hte <- NULL

for (i in 1:length(t)){
  
  temp <- gtp_plot[gtp_plot$t==t[i],]
  
  if (nrow(temp)!=2) next
  
  diff <- temp$CACE[temp$lottery_type=="Holiday"] - temp$CACE[temp$lottery_type=="No Holiday"]
  
  SE <- sqrt((temp$SE[temp$lottery_type=="Holiday"])^2 + (temp$CACE[temp$lottery_type=="No Holiday"])^2)
  
  diff.hte <- rbind.data.frame(diff.hte, as.vector(c(t[i], diff, SE)))
  
  print(i)
}

names(diff.hte) <- c("t", "CACE", "SE")
diff.hte1 <- diff.hte
diff.hte$lottery_type <- "Difference"
diff.hte$outcome <- "Paid on Time"


gtp_plot <- rbind.data.frame(gtp_plot, diff.hte)

gtp_plot$upper <- gtp_plot$CACE + qnorm(.975) * gtp_plot$SE 
gtp_plot$lower <- gtp_plot$CACE - qnorm(.975) * gtp_plot$SE

gtp_plot$lottery_type <- as.factor(gtp_plot$lottery_type)
gtp_plot$lottery_type <- relevel(gtp_plot$lottery_type, ref= "No Holiday")
gtp_plot$lottery_type <- relevel(gtp_plot$lottery_type, ref= "Holiday")

# Calculate the p-value for the difference (one and two-tailed tests)
gtp_plot$t.stat <- gtp_plot$CACE/gtp_plot$SE
gtp_plot$p_value.2 <- 2 * (1 - pnorm(abs(gtp_plot$t.stat)))  # Two-tailed test
gtp_plot$p_value.1 <- (1 - pnorm(abs(gtp_plot$t.stat)))  # One-tailed test
gtp_plot$stars <- ifelse(gtp_plot$p_value.1<=.05 & 
                           gtp_plot$t>0, "+", " ") # add one-tailed for post-treatment periods


# Figure
p <- ggplot(gtp_plot[gtp_plot$t<14+8,], aes(x=t, y=CACE))
p + facet_grid(lottery_type~.) + 
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") + 
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed", color="gray60") +
  #geom_text(aes(x = t, y = (upper + .02), label = stars), size = 5) + 
  geom_errorbar(aes(x=t,
                    ymin=lower, 
                    ymax=upper), 
                width=.1, size=.8, position = position_dodge(width = 0.5)) +
  geom_point(size=2.5, position = position_dodge(width = 0.5)) +
  xlab("Payments Since Tax Holiday") +
  ylab("Paid on Time (CACE)") +
  geom_vline(aes(xintercept=0), size=.7) +
  theme_bw() +
  scale_colour_manual(values = c("black","blue")) +
  scale_alpha_manual(values = c("FALSE"=0.8, "TRUE"=1), guide='none') +
  scale_x_discrete(limit = c(-10:0,4:21),
                   labels = as.character(-10:18)) + 
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        legend.position = "none",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))




##############################################
message("APPENDIX: Figure A7")

# Placebo test: Treatment Effects for Automatic vs. Manual Payers
# Full Post Treatment Period

gtp_taxes <- taxes_panel[taxes_panel$ES_BP==1, ]
gtp_taxes <- gtp_taxes[gtp_taxes$TRIBUTO!="Patente de Rodados",]
t <- unique(taxes_panel$t_st)
t <- t[order(t)]
t <- t[t>-11 & t<=28]
gtp_plot <- NULL


# Automatic Payment & Manual Payment
for (i in 1:length(t)){
  
  temp <- gtp_taxes[gtp_taxes$t_st == t[i], ]
  
  if (nrow(temp[temp$TREATMENT==1,])==0) next
  if (nrow(temp[temp$TREATMENT==0,])==0) next
  
  on_time_auto <- robust.se(ivreg(en_fecha ~ TIENE_EXO, ~ TREATMENT, 
                                  data=temp[temp$autopay_win==1,]))[2,1:2]
  on_time_manual <- robust.se(ivreg(en_fecha ~ TIENE_EXO, ~ TREATMENT, 
                                    data=temp[temp$autopay_win==0,]))[2,1:2]
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], on_time_auto, "Automatic Payment")),
                    as.vector(c("Paid on Time", t[i], on_time_manual, "Manual Payment")))
  print(i)
}


gtp_plot <- as.data.frame(gtp_plot)
names(gtp_plot) <- c("outcome", "t", "CACE", "SE", "sample")

gtp_plot$t <- as.numeric(as.character(gtp_plot$t))
gtp_plot$CACE <- as.numeric(as.character(gtp_plot$CACE))
gtp_plot$SE <- as.numeric(as.character(gtp_plot$SE))

# Difference
diff.hte <- NULL

for (i in 1:length(t)){
  
  temp <- gtp_plot[gtp_plot$t==t[i],]
  
  if (nrow(temp)!=2) next 
  
  diff <- temp$CACE[temp$sample=="Manual Payment"] - temp$CACE[temp$sample=="Automatic Payment"]
  
  SE <- sqrt((temp$SE[temp$sample=="Manual Payment"])^2 + (temp$CACE[temp$sample=="Automatic Payment"])^2)
  
  diff.hte <- rbind.data.frame(diff.hte, as.vector(c(t[i], diff, SE)))
  
  print(i)
}


names(diff.hte) <- c("t", "CACE", "SE")
diff.hte2 <- diff.hte
diff.hte$sample <- "Difference"
diff.hte$outcome <- "Paid on Time"


gtp_plot <- rbind.data.frame(gtp_plot, diff.hte)

gtp_plot$upper <- gtp_plot$CACE + qnorm(.975) * gtp_plot$SE 
gtp_plot$lower <- gtp_plot$CACE - qnorm(.975) * gtp_plot$SE

gtp_plot$sample <- as.factor(gtp_plot$sample)
gtp_plot$sample <- relevel(gtp_plot$sample, ref= "Automatic Payment")
gtp_plot$sample <- relevel(gtp_plot$sample, ref= "Manual Payment")

# Calculate the p-value for the difference (one and two-tailed tests)
gtp_plot$t.stat <- gtp_plot$CACE/gtp_plot$SE
gtp_plot$p_value.2 <- 2 * (1 - pnorm(abs(gtp_plot$t.stat)))  # Two-tailed test
gtp_plot$p_value.1 <- (1 - pnorm(abs(gtp_plot$t.stat)))  # One-tailed test
gtp_plot$stars <- ifelse(gtp_plot$p_value.1<=.05 & 
                           gtp_plot$t>0, "+", " " ) # add one-tailed for post-treatment periods


# Figure
p <- ggplot(gtp_plot[gtp_plot$t<14+15,], aes(x=t, y=CACE))
p + facet_grid(sample~.) + 
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed", color="gray60") +
  #geom_text(aes(x = t, y = (upper + .02), label = stars), size = 5) + 
  geom_errorbar(aes(x=t,
                    ymin=lower, 
                    ymax=upper), 
                width=.1, size=.8, position = position_dodge(width = 0.5)) +
  geom_point(size=2.5, position = position_dodge(width = 0.5)) +
  xlab("Payments Since Tax Holiday") +
  ylab("Paid on Time (CACE)") +
  geom_vline(aes(xintercept=0), size=.7) +
  theme_bw() +
  scale_colour_manual(values = c("black","blue")) +
  scale_alpha_manual(values = c("FALSE"=0.8, "TRUE"=1), guide='none') +
  scale_x_discrete(limit = c(-10:0,4:28),
                   labels = as.character(-10:25)) + 
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        legend.position = "none",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))

##############################################
message("APPENDIX: Figure A8")

# The Stock of Habit: Perfect vs. Imperfect Past Compliers
# Full Post Treatment Period


taxpayer_type <- ddply(gtp_taxes[gtp_taxes$t_st<=0 & gtp_taxes$t_st>-15,], 
                       "CUENTA", summarise,
                       type = mean(en_fecha, na.rm=T))

gtp_taxes <- merge(gtp_taxes, taxpayer_type, by="CUENTA", all.x=T) 
t <- unique(gtp_taxes$t_st)
t <- t[order(t)]
t <- t[t>-11 & t<=21]
gtp_plot <- NULL

# Imperfect Past Complier
for (i in 1:length(t)){
  
  temp <- gtp_taxes[gtp_taxes$t_st == t[i], ]
  
  if (nrow(temp[temp$TREATMENT==1 & temp$type!=1,])==0) next
  if (nrow(temp[temp$TREATMENT==0 & temp$type!=1,])==0) next
  
  on_time_marginal <- robust.se(ivreg(en_fecha ~ TIENE_EXO, ~ TREATMENT, 
                                      data=temp[temp$type!=1,]))[2,1:2]
  
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], 
                                on_time_marginal, "Imperfect Past Complier")))
  print(i)
}

# Perfect Past Complier
for (i in 1:length(t)){
  
  temp <- gtp_taxes[gtp_taxes$t_st == t[i], ]
  
  if (nrow(temp[temp$TREATMENT==1 & temp$type==1,])==0) next
  if (nrow(temp[temp$TREATMENT==0 & temp$type==1,])==0) next
  
  on_time_compliant <- robust.se(ivreg(en_fecha ~ TIENE_EXO, ~ TREATMENT, 
                                       data=temp[temp$type==1,]))[2,1:2]
  
  
  gtp_plot <- rbind(gtp_plot, 
                    as.vector(c("Paid on Time", t[i], 
                                on_time_compliant, "Perfect Past Complier")))
  print(i)
}

gtp_plot <- as.data.frame(gtp_plot)
names(gtp_plot) <- c("outcome", "t", "CACE", "SE", "sample")

gtp_plot$t <- as.numeric(as.character(gtp_plot$t))
gtp_plot$CACE <- as.numeric(as.character(gtp_plot$CACE))
gtp_plot$SE <- as.numeric(as.character(gtp_plot$SE))

# Difference
diff.hte <- NULL

for (i in 1:length(t)){
  
  temp <- gtp_plot[gtp_plot$t==t[i],]
  
  if (nrow(temp)!=2) next 
  
  diff <- temp$CACE[temp$sample=="Imperfect Past Complier"] - temp$CACE[temp$sample=="Perfect Past Complier"]
  
  SE <- sqrt((temp$SE[temp$sample=="Imperfect Past Complier"])^2 + (temp$CACE[temp$sample=="Perfect Past Complier"])^2)
  
  diff.hte <- rbind.data.frame(diff.hte, as.vector(c(t[i], diff, SE)))
  
  print(i)
}


names(diff.hte) <- c("t", "CACE", "SE")
diff.hte3 <- diff.hte
diff.hte$sample <- "Difference"
diff.hte$outcome <- "Paid on Time"

gtp_plot <- rbind.data.frame(gtp_plot, diff.hte)

gtp_plot$upper <- gtp_plot$CACE + qnorm(.975) * gtp_plot$SE 
gtp_plot$lower <- gtp_plot$CACE - qnorm(.975) * gtp_plot$SE

gtp_plot$sample  <- as.factor(gtp_plot$sample)
gtp_plot$sample  <- relevel(gtp_plot$sample, ref= "Perfect Past Complier")
gtp_plot$sample  <- relevel(gtp_plot$sample, ref= "Imperfect Past Complier")

gtp_plot$upper <- gtp_plot$CACE + qnorm(.975) * gtp_plot$SE 
gtp_plot$lower <- gtp_plot$CACE - qnorm(.975) * gtp_plot$SE


# Calculate the p-value for the difference (one and two-tailed tests)

gtp_plot$t.stat <- gtp_plot$CACE/gtp_plot$SE
gtp_plot$p_value.2 <- 2 * (1 - pnorm(abs(gtp_plot$t.stat)))  # Two-tailed test
gtp_plot$p_value.1 <- (1 - pnorm(abs(gtp_plot$t.stat)))  # One-tailed test
gtp_plot$stars <- ifelse(gtp_plot$p_value.1<=.05 &
                           gtp_plot$t>0, "+", " ") # add one-tailed for post-treatment periods


# Figure

p <- ggplot(gtp_plot[gtp_plot$t<22,], aes(x=t, y=CACE))
p + facet_grid(sample ~.) + 
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed", color="gray60") +
  #geom_text(aes(x = t, y = (upper + .02), label = stars), size = 5) + 
  geom_errorbar(aes(x=t,
                    ymin=lower, 
                    ymax=upper), 
                width=.1, size=.8, position = position_dodge(width = 0.5)) +
  geom_point(size=2.5, position = position_dodge(width = 0.5)) +
  xlab("Payments Since Tax Holiday") +
  ylab("Paid on Time (CACE)") +
  geom_vline(aes(xintercept=0), size=.7) +
  theme_bw() +
  scale_colour_manual(values = c("black","blue")) +
  scale_alpha_manual(values = c("FALSE"=0.8, "TRUE"=1), guide='none') +
  scale_x_discrete(limit = c(-10:0,4:21),
                   labels = as.character(-10:18)) + 
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        legend.position = "none",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))







##############################################
message("APPENDIX: Figure A9")

# Natural Experiment: Heterogeneous Treatment Effects by Type of Tax.

gtp_taxes <- taxes_panel[taxes_panel$ES_BP==1, ]

t <- unique(gtp_taxes$t_st[gtp_taxes$TREATMENT==1])
t <- t[order(t)]
t <- t[t>-11 & t<24]
taxes <- as.character(unique(taxes_panel$TRIBUTO))
gtp_plot <- NULL

#CACE estimation for all periods
for (i in 1:length(t)){
  
  for (j in 1:4){
    
    temp <- gtp_taxes[gtp_taxes$t_st == t[i] & gtp_taxes$TRIBUTO==taxes[j], ]
    temp$en_fecha[temp$TREATMENT==1]
    
    if (nrow(temp[temp$TREATMENT==1,])==0) next
    if (nrow(temp[temp$TREATMENT==0,])==0) next
    
    ivest <- tidy(iv_robust(en_fecha ~ TIENE_EXO | TREATMENT, 
                            data=temp))
    on_time <- c(ivest$estimate[2], ivest$std.error[2])
    
    ivest <- tidy(iv_robust(nr_paymntsowed ~ TIENE_EXO | TREATMENT, 
                            data=temp))
    bills_owed <- c(ivest$estimate[2], ivest$std.error[2])
    
    
    gtp_plot <- rbind(gtp_plot, 
                      as.vector(c("Paid on Time", t[i], on_time, taxes[j])),
                      as.vector(c("Nr. of Bills Owed", t[i], bills_owed, taxes[j]))
    )
    
  }
}

gtp_plot <- as.data.frame(gtp_plot)
names(gtp_plot) <- c("outcome", "t", "CACE", "SE", "tax")

gtp_plot$t <- as.numeric(as.character(gtp_plot$t))
gtp_plot <- gtp_plot[!(gtp_plot$tax=="Patente de Rodados" &
                         gtp_plot$t>15),]
gtp_plot <- gtp_plot[!((gtp_plot$tax=="Contribucion Inmobiliaria" |
                          gtp_plot$tax=="Patente de Rodados") &
                         gtp_plot$t>25),]

gtp_plot$CACE <- as.numeric(as.character(gtp_plot$CACE))
gtp_plot$SE <- as.numeric(as.character(gtp_plot$SE))
gtp_plot$upper <- gtp_plot$CACE + qnorm(.975) * gtp_plot$SE 
gtp_plot$lower <- gtp_plot$CACE - qnorm(.975) * gtp_plot$SE

gtp_plot$tax <- as.factor(gtp_plot$tax)
levels(gtp_plot$tax) <- c("Property", "Vehicle", "Sewage", "Head")
gtp_plot$t_label <- ifelse(gtp_plot$t <= 0, as.character(gtp_plot$t),
                           ifelse(gtp_plot$t %in% 1:3, NA, as.character(gtp_plot$t - 3)))


#Figure
ggplot(gtp_plot[gtp_plot$outcome=="Paid on Time",], aes(x=t, y=CACE)) +
  facet_grid(tax~.) + 
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") +
  geom_errorbar(aes(x=t,
                    ymin=lower, 
                    ymax=upper), 
                width=.6, size=.8, position = position_dodge(width = 0.5)) +
  geom_point(size=2.5, position = position_dodge(width = 0.5)) +
  xlab("Payments Since Tax Holiday") +
  ylab("Paid on Time (CACE)") +
  geom_vline(aes(xintercept=0), size=.7) +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed") +
  theme_bw() +
  scale_x_continuous(breaks = unique(gtp_plot$t),
                     labels = unique(gtp_plot$t_label)) +
  scale_colour_manual(values = c("black","blue")) +
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        legend.position = "none",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))

##############################################
message("APPENDIX: Figure A10")

# Natural Experiment: Heterogeneous Treatment Effects by Property Value.

gtp_taxes <- taxes_panel[taxes_panel$ES_BP==1 & 
                           taxes_panel$TRIBUTO=="Contribucion Inmobiliaria", ]

gtp_taxes$high_propvalue <- ifelse(gtp_taxes$VALOR_CAT2004 > 
                                     median(gtp_taxes$VALOR_CAT2004, na.rm=T),
                                   1, 0)

t <- unique(gtp_taxes$t_st[gtp_taxes$TREATMENT==1])
t <- t[order(t)]
t <- t[t>-11 & t<24]
pay_month <- as.character(unique(gtp_taxes$bill_month))
gtp_plot <- NULL

gtp_taxes <- gtp_taxes[!is.na(gtp_taxes$high_propvalue),]

#CACE estimation for all periods
for (i in 1:length(t)){
  
  for (j in c(1,0)){
    
    temp <- gtp_taxes[gtp_taxes$t_st == t[i] & 
                        gtp_taxes$high_propvalue==j, ]
    
    if (nrow(temp[temp$TREATMENT==1,])==0) next
    if (nrow(temp[temp$TREATMENT==0,])==0) next
    
    ivest <- tidy(iv_robust(en_fecha ~ TIENE_EXO | TREATMENT, 
                            data=temp))
    on_time  <- c(ivest$estimate[2], ivest$std.error[2])
    
    gtp_plot <- rbind(gtp_plot, 
                      as.vector(c("Paid on Time", t[i], on_time, j)))
    
    
  }
  
}

gtp_plot <- as.data.frame(gtp_plot)
names(gtp_plot) <- c("outcome", "t", "CACE", "SE", "prop_value")

gtp_plot$t <- as.numeric(as.character(gtp_plot$t))

gtp_plot$CACE <- as.numeric(as.character(gtp_plot$CACE))
gtp_plot$SE <- as.numeric(as.character(gtp_plot$SE))
gtp_plot$upper <- gtp_plot$CACE + qnorm(.975) * gtp_plot$SE 
gtp_plot$lower <- gtp_plot$CACE - qnorm(.975) * gtp_plot$SE

# Difference
low<-gtp_plot[gtp_plot$prop_value==0,] 
high<-gtp_plot[gtp_plot$prop_value==1,]

diff<-data.frame(matrix(ncol=7))
colnames(diff)<-c("outcome","t","CACE","SE","prop_value","upper","lower")

t=1
for (i in unique(gtp_plot$t)){
  
  aux_1<-low[low$t==i,]
  aux_0<-high[high$t==i,]
  
  # Estimate
  CACE<-aux_1$CACE-aux_0$CACE
  # Calculate the standard error of the difference between the means
  se_diff <- sqrt((aux_1$SE^2) + (aux_0$SE^2))
  # calculate upper and lower CI
  upper <- CACE + 1.96*se_diff
  lower <- CACE - 1.96*se_diff
  
  diff[t,'CACE']<-CACE
  diff[t,'SE']<-se_diff
  diff[t,'upper']<-upper
  diff[t,'lower']<-lower
  diff[t,'t']<-paste0(i)
  diff[t,'prop_value']<-'Difference in Effects'
  diff[t,'outcome']<-'Paid on Time'
  t<-t+1
}
rm(aux_1,aux_0,low,high)

# Add to data
gtp_plot<-rbind(gtp_plot,diff);rm(diff)

gtp_plot$t <- as.numeric(as.character(gtp_plot$t))
gtp_plot$t_label <- ifelse(gtp_plot$t <= 0, as.character(gtp_plot$t),
                           ifelse(gtp_plot$t %in% 1:3, NA, as.character(gtp_plot$t - 3)))
gtp_plot$prop_value <- ifelse(gtp_plot$prop_value=='0',"Low Property Value",gtp_plot$prop_value)
gtp_plot$prop_value <- ifelse(gtp_plot$prop_value=='1',"High Property Value",gtp_plot$prop_value)


gtp_plot$prop_value<-forcats::fct_relevel(gtp_plot$prop_value, "Low Property Value","High Property Value", 'Difference in Effects')
gtp_plot$t_label <- ifelse(gtp_plot$t <= 0, as.character(gtp_plot$t),
                           ifelse(gtp_plot$t %in% 1:3, NA, as.character(gtp_plot$t - 3)))

#Figure A
fig_a<-ggplot(gtp_plot[gtp_plot$prop_value %in% c("Low Property Value","High Property Value"),], aes(x=t, y=CACE)) +
  facet_grid(prop_value ~ .) + 
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") +
  geom_errorbar(aes(x=t,
                    ymin=lower, 
                    ymax=upper), 
                width=.6, size=.8, position = position_dodge(width = 0.5)) +
  geom_point(size=2.5, position = position_dodge(width = 0.5)) +
  #xlab("Payments Since Tax Holiday") +
  ylab("Paid on Time (CACE)") +
  scale_x_continuous(breaks = unique(gtp_plot$t),
                     labels = unique(gtp_plot$t_label)) +
  geom_vline(aes(xintercept=0), size=.7) +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed") +
  theme_bw() +
  scale_colour_manual(values = c("black","blue")) +
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        legend.position = "none",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))

#Figure B
fig_b<-ggplot(gtp_plot[gtp_plot$prop_value %in% c('Difference in Effects'),], aes(x=t, y=CACE)) +
  facet_grid(prop_value ~ .) + 
  geom_rect(data=NULL,aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80") +
  geom_errorbar(aes(x=t,
                    ymin=lower, 
                    ymax=upper), 
                width=.6, size=.8, position = position_dodge(width = 0.5)) +
  geom_point(size=2.5, position = position_dodge(width = 0.5)) +
  xlab("Payments Since Tax Holiday") +
  ylab("Paid on Time (CACE)") +
  scale_x_continuous(breaks = unique(gtp_plot$t),
                     labels = unique(gtp_plot$t_label)) +
  geom_vline(aes(xintercept=0), size=.7) +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed") +
  theme_bw() +
  scale_colour_manual(values = c("black","blue")) +
  theme(plot.title = element_text(size = rel(1.75)),
        axis.text.x = element_text(size = rel(1.1), hjust=.7),
        axis.text.y = element_text(size = rel(1.25)),
        axis.title.y = element_text(size = rel(1.3)),
        axis.title.x = element_text(size = rel(1.3)),
        legend.text = element_text(size = rel(1.2)),
        strip.text.x = element_text(size = rel(1.4)),
        strip.text.y = element_text(size = rel(1.4)),
        strip.background = element_rect(size = 1.5),
        legend.position = "none",
        legend.title=element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))

#Final figure
fig<-ggarrange(fig_a,fig_b,ncol = 1,nrow = 2,labels = c('A','B'),heights = c(2, 1))
fig

##############################################
message("APPENDIX TABLE A3")

# Multiple comparison adjustments for the Natural Experiment

# Main test: Diff-in-diff (three outcomes): 1 year DiD for all taxes pooled

# rescaling the time variable to account for the taxes that have twice as 
# many payments per year
taxes_panel$t_st_2 <- ifelse(taxes_panel$tax=="Sewage" | taxes_panel$tax=="Head", 
                             taxes_panel$t_st/2, taxes_panel$t_st)


# 1 year diff in diff setup
dd_data <-  rbind.data.frame(taxes_panel %>% filter(ES_BP==1) %>%
                               group_by(CUENTA, TRIBUTO, TREATMENT) %>% dplyr::summarise(
                                 compliance_mean_DiD_1yr =
                                   mean(compliance[t_st>3 & t_st<=6], na.rm=T) - 
                                   mean(compliance[t_st<0 & t_st>=(-3)], na.rm=T),
                                 missed_payment_mean_DiD_1yr =
                                   mean(missed_payment[t_st>3 & t_st<=6], na.rm=T)-
                                   mean(missed_payment[t_st<0 & t_st>=(-3)], na.rm=T),
                                 nr_missed_payments_mean_DiD_1yr =
                                   mean(nr_paymntsowed[t_st> 3 & t_st<=6], na.rm=T)-
                                   mean(nr_paymntsowed[t_st<0 & t_st>=(-3)], na.rm=T),
                                 compliance_mean_DiD_1yr.yr2 =
                                   mean(compliance[t_st>6 & t_st<=9], na.rm=T) - 
                                   mean(compliance[t_st<0 & t_st>=(-3)], na.rm=T),
                                 missed_payment_mean_DiD_1yr.yr2 =
                                   mean(missed_payment[t_st>6 & t_st<=9], na.rm=T)-
                                   mean(missed_payment[t_st<0 & t_st>=(-3)], na.rm=T),
                                 nr_missed_payments_mean_DiD_1yr.yr2 =
                                   mean(nr_paymntsowed[t_st> 6 & t_st<=9], na.rm=T)-
                                   mean(nr_paymntsowed[t_st<0 & t_st>=(-3)], na.rm=T)
                               )
)


DiD_1yr_compliance <- difference_in_means(compliance_mean_DiD_1yr ~ TREATMENT, 
                                          data = dd_data)
DiD_1yr_compliance
DiD_1yr_missed_payment <- difference_in_means(missed_payment_mean_DiD_1yr ~ TREATMENT, 
                                              data = dd_data)
DiD_1yr_missed_payment
DiD_1yr_nrmissed_payments <- difference_in_means(nr_missed_payments_mean_DiD_1yr ~ TREATMENT,
                                                 data = dd_data)
DiD_1yr_nrmissed_payments 

main <- rbind.data.frame(c("Main 1yr DiD - Missed Payment", DiD_1yr_missed_payment$p.value),
                         c("Main 1yr DiD - Nr Missed Payments", DiD_1yr_nrmissed_payments$p.value),
                         c("Main 1yr DiD - Compliance", DiD_1yr_compliance$p.value))

# Persistence of effects, heterogeneous effects (three outcomes) 
# Difference between years 1 and 2 for the three outcomes.

persistence_missed <- comp.eff(difference_in_means(missed_payment_mean_DiD_1yr ~ TREATMENT,
                                                   data = dd_data),
                               difference_in_means(missed_payment_mean_DiD_1yr.yr2 ~ TREATMENT,
                                                   data = dd_data))

persistence_nrowed <- comp.eff(difference_in_means(nr_missed_payments_mean_DiD_1yr ~ TREATMENT,
                                                   data = dd_data),
                               difference_in_means(nr_missed_payments_mean_DiD_1yr.yr2 ~ TREATMENT,
                                                   data = dd_data))

persistence_compliance <- comp.eff(difference_in_means(compliance_mean_DiD_1yr ~ TREATMENT,
                                                       data = dd_data),
                                   difference_in_means(compliance_mean_DiD_1yr.yr2 ~ TREATMENT,
                                                       data = dd_data))

persistence <- rbind.data.frame(c("Persistence yr 1 vs 2  - Missed Payment", persistence_missed[4]),
                                c("Persistence yr 1 vs 2  - Nr Payments Missed", persistence_nrowed[4]),
                                c("Persistence yr 1 vs 2 - Compliance", persistence_compliance[4])
)



# Heterogeneous effects, by cost of payment

# 1 year diff in diff setup
dd_data_vc <-  rbind.data.frame(taxes_panel %>% filter(ES_BP==1 & TRIBUTO=="Contribucion Inmobiliaria") %>%
                                  group_by(CUENTA, TREATMENT, VALOR_CAT2004) %>% dplyr::summarise(
                                    compliance_mean_DiD_1yr =
                                      mean(compliance[t_st>3 & t_st<=6], na.rm=T) - 
                                      mean(compliance[t_st<0 & t_st>=(-3)], na.rm=T),
                                    missed_payment_mean_DiD_1yr =
                                      mean(missed_payment[t_st>3 & t_st<=6], na.rm=T)-
                                      mean(missed_payment[t_st<0 & t_st>=(-3)], na.rm=T),
                                    nr_missed_payments_mean_DiD_1yr =
                                      mean(nr_paymntsowed[t_st> 3 & t_st<=6], na.rm=T)-
                                      mean(nr_paymntsowed[t_st<0 & t_st>=(-3)], na.rm=T)
                                  )
)

dd_data_vc$high_propvalue <- ifelse(dd_data_vc$VALOR_CAT2004 > 
                                      median(dd_data_vc$VALOR_CAT2004, na.rm=T),
                                    1, 0)

income_missed <- comp.eff(difference_in_means(missed_payment_mean_DiD_1yr ~ TREATMENT, 
                                              data = filter(dd_data_vc, high_propvalue == 1)),
                          difference_in_means(missed_payment_mean_DiD_1yr ~ TREATMENT, 
                                              data = filter(dd_data_vc, high_propvalue == 0)))


income_nrmissed <- comp.eff(difference_in_means(nr_missed_payments_mean_DiD_1yr ~ TREATMENT, 
                                                data = filter(dd_data_vc, high_propvalue == 1)),
                            difference_in_means(nr_missed_payments_mean_DiD_1yr ~ TREATMENT, 
                                                data = filter(dd_data_vc, high_propvalue == 0)))


income_compliance <- comp.eff(difference_in_means(compliance_mean_DiD_1yr ~ TREATMENT, 
                                                  data = filter(dd_data_vc, high_propvalue == 1)),
                              difference_in_means(compliance_mean_DiD_1yr ~ TREATMENT, 
                                                  data = filter(dd_data_vc, high_propvalue == 0)))



income <- rbind.data.frame(c("Income HTE - Missed Payment", income_missed[4]),
                           c("Income HTE - Nr Payments Missed", income_nrmissed[4]),
                           c("Income HTE - Compliance", income_compliance[4]))


# Comparing natural and field experiments
natvsfield <- comp.eff(difference_in_means(missed_payment ~ TREATMENT, 
                                           data = filter(taxes_panel, ES_BP==1 & t_st==4)),
                       difference_in_means(JUL_2014_ontime ~ pooled_124_0, weights = pooled_124_0_wts,
                                           data = filter(fieldex, type=="good taxpayer")))

natvsfield <- rbind.data.frame(c("Natural vs Field Experiment", natvsfield[4]))


names(main) <- names(persistence) <- names(income) <- names(natvsfield) <- c("H","p")
all <- rbind(main, persistence, income, natvsfield)
all$p <- as.numeric(all$p)
all$p.bonferroni <- p.adjust(all$p, method = "bonferroni")
all$p.fdr <- p.adjust(all$p, method = "fdr")

# Round the numeric columns to 3 decimal places
all[, 2:4] <- round(all[, 2:4], 5)

#Table
all



##############################################
message('APPENDIX: Figure A18')

# Field Experiment: Geographic Distribution of 
# Eligible and Ineligible Taxpayers


# Map
map<-ggplot() +
  geom_sf(data = shp,aes(fill=PORC_NBI,  geometry = geometry), colour = NA)+
  scale_fill_gradient(low = "grey90", high = "grey50",name = "PORC_NBI") +
  geom_sf(data = datafx_map[datafx_map$ES_BP==1,], aes(geometry = geometry),color='green',fill='green')+
  geom_sf(data = datafx_map[datafx_map$ES_BP==0,], aes(geometry = geometry),color='blue',fill='blue') +
  theme_classic()+
  theme(plot.background = element_rect(fill = "white"),
        panel.background = element_rect(colour = "white",),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank(),
        legend.position = 'none')+
  coord_sf()

map

##############################################
message("APPENDIX TABLE A4")

# Field Experiment: Pre-Treatment Balance

out <- cbind.data.frame(
  c("MAR_2010_ontime", "JUL_2010_ontime", "NOV_2010_ontime", "MAR_2011_ontime", 
    "JUL_2011_ontime", "NOV_2011_ontime", "MAR_2012_ontime", "JUL_2012_ontime", 
    "NOV_2012_ontime", "MAR_2013_ontime", "JUL_2013_ontime", "NOV_2013_ontime", 
    "MAR_2014_ontime"), 
  c("Paid on Time MAR 2010", "Paid on Time JUL 2010", "Paid on Time NOV 2010", 
    "Paid on Time MAR 2011", "Paid on Time JUL 2011", "Paid on Time NOV 2011", 
    "Paid on Time MAR 2012", "Paid on Time JUL 2012", "Paid on Time NOV 2012", 
    "Paid on Time MAR 2013", "Paid on Time JUL 2013", "Paid on Time NOV 2013", 
    "Paid on Time MAR 2014")
)

treat <- rbind.data.frame(
  c("pooled_124_6", "pooled_124_6_wts", "Treatment versus Pure Control"),
  c("pooled_124_0", "pooled_124_0_wts", "Treatment versus Placebo"),
  c("pooled_0_6", "pooled_0_6_wts", "Placebo versus Pure Control")
)

bal <- merge(treat, out)
names(bal) <- c("treat_var", "wts", "treat_label", "out_var", "out_label")

balance <- NULL
for (i in 1:nrow(bal)){
  
  out <- fieldex[,bal$out_var[i]]
  treat <- fieldex[,bal$treat_var[i]]
  wts <- fieldex[,bal$wts[i]]
  balance <- rbind.data.frame(balance, difference_in_means(out ~ treat, 
                                                           weights = wts))
  rm(out, treat, wts)
}

balance <- cbind(bal[,c("treat_label", "out_label")], 
                 balance[,c("coefficients", "std.error", "nobs", "p.value")])

#Table
balance


##############################################
message("APPENDIX TABLE A5")

#Field Experiment: Treatment Conditions and Sample 
## Sizes (Full Experimental Design)


#Table by type
tab_1<-as.data.frame(table(fieldex$treatment, fieldex$type)) %>% 
  pivot_wider(.,names_from = "Var2",values_from = "Freq") %>% 
  mutate(Var1= as.character(Var1),
         Var1=case_when(Var1=='0'~'Reminder of Taxes Due (Placebo Control)',
                        Var1=='1'~'Reminder + Lottery/Individual Reward',
                        Var1=='2'~ 'Reminder + Lottery/Individual Reward + Probability of
Winning',
                        Var1=='3'~'Reminder + Individual Punishment',
                        Var1=='4'~'Reminder + Lottery + Social Benefit',
                        Var1=='5'~'Reminder + Social Punishment',
                        Var1=='6'~'Pure Control'),
         treatment_condition=Var1) %>% 
  select(-Var1)

#Table total N
tab_2<-as.data.frame(table(fieldex$type)) %>% 
  mutate(treatment_condition='TOTAL N') %>% 
  pivot_wider(.,names_from = "Var1",values_from = "Freq")

#Final table
tab<-rbind(tab_1,tab_2);rm(tab_1,tab_2)

tab[c(7,1,2,3,4,5,6,8),c(3,1,2)]

##############################################
message("APPENDIX FIGURE A19")

# Field Experiment: Complete Results.

# Filter into eligible and ineligible datasets
fieldexE <- fieldex %>% filter(type=="good taxpayer")
fieldexN <- fieldex %>% filter(type=="bad taxpayer")

## Eligibles
web <- as.data.frame(rbind(
  with(fieldexE[(fieldexE$treatment==6 | fieldexE$treatment==0),],
       ttest(JUL_2014_WEBACCESS,(treatment==0)))[c(3:4,8)],
  with(fieldexE[(fieldexE$treatment==6 | fieldexE$treatment==1),],
       ttest(JUL_2014_WEBACCESS,(treatment==1)))[c(3:4,8)],
  with(fieldexE[(fieldexE$treatment==6 | fieldexE$treatment==3),],
       ttest(JUL_2014_WEBACCESS,(treatment==3)))[c(3:4,8)],
  with(fieldexE[(fieldexE$treatment==6 | fieldexE$treatment==4),],
       ttest(JUL_2014_WEBACCESS,(treatment==4)))[c(3:4,8)],
  with(fieldexE[(fieldexE$treatment==6 | fieldexE$treatment==5),],
       ttest(JUL_2014_WEBACCESS,(treatment==5)))[c(3:4,8)]))
web$outcome <- "Intended Compliance (Accessed Web Account)"

missed <- as.data.frame(rbind(
  with(fieldexE[(fieldexE$treatment==6 | fieldexE$treatment==0),],
       ttest(JUL_2014_ontime,(treatment==0)))[c(3:4,8)],
  with(fieldexE[(fieldexE$treatment==6 | fieldexE$treatment==1),],
       ttest(JUL_2014_ontime,(treatment==1)))[c(3:4,8)],
  with(fieldexE[(fieldexE$treatment==6 | fieldexE$treatment==3),],
       ttest(JUL_2014_ontime,(treatment==3)))[c(3:4,8)],
  with(fieldexE[(fieldexE$treatment==6 | fieldexE$treatment==4),],
       ttest(JUL_2014_ontime,(treatment==4)))[c(3:4,8)],
  with(fieldexE[(fieldexE$treatment==6 | fieldexE$treatment==5),],
       ttest(JUL_2014_ontime,(treatment==5)))[c(3:4,8)]))
missed$outcome <- "Paid Bill On Time"

fieldex_plotE <- rbind(missed, web)
fieldex_plotE$type <- "Eligible Taxpayers"

## Noneligibles
web <- as.data.frame(rbind(
  with(fieldexN[(fieldexN$treatment==6 | fieldexN$treatment==0),],
       ttest(JUL_2014_WEBACCESS,(treatment==0)))[c(3:4,8)],
  with(fieldexN[(fieldexN$treatment==6 | fieldexN$treatment==1),],
       ttest(JUL_2014_WEBACCESS,(treatment==1)))[c(3:4,8)],
  with(fieldexN[(fieldexN$treatment==6 | fieldexN$treatment==3),],
       ttest(JUL_2014_WEBACCESS,(treatment==3)))[c(3:4,8)],
  with(fieldexN[(fieldexN$treatment==6 | fieldexN$treatment==4),],
       ttest(JUL_2014_WEBACCESS,(treatment==4)))[c(3:4,8)],
  with(fieldexN[(fieldexN$treatment==6 | fieldexN$treatment==5),],
       ttest(JUL_2014_WEBACCESS,(treatment==5)))[c(3:4,8)]))
web$outcome <- "Intended Compliance (Accessed Web Account)"

missed <- as.data.frame(rbind(
  with(fieldexN[(fieldexN$treatment==6 | fieldexN$treatment==0),],
       ttest(JUL_2014_ontime,(treatment==0)))[c(3:4,8)],
  with(fieldexN[(fieldexN$treatment==6 | fieldexN$treatment==1),],
       ttest(JUL_2014_ontime,(treatment==1)))[c(3:4,8)],
  with(fieldexN[(fieldexN$treatment==6 | fieldexN$treatment==3),],
       ttest(JUL_2014_ontime,(treatment==3)))[c(3:4,8)],
  with(fieldexN[(fieldexN$treatment==6 | fieldexN$treatment==4),],
       ttest(JUL_2014_ontime,(treatment==4)))[c(3:4,8)],
  with(fieldexN[(fieldexN$treatment==6 | fieldexN$treatment==5),],
       ttest(JUL_2014_ontime,(treatment==5)))[c(3:4,8)]))
missed$outcome <- "Paid Bill On Time"

fieldex_plotN <- rbind(missed, web)
fieldex_plotN$type <- "Ineligible Taxpayers"

fieldex_plot <- rbind(fieldex_plotE,fieldex_plotN)

names(fieldex_plot) <- c("mean", "se", "p-value", "outcome", "type")

fieldex_plot$upper <- fieldex_plot$mean + qnorm(.975)*(fieldex_plot$se)
fieldex_plot$lower <- fieldex_plot$mean - qnorm(.975)*(fieldex_plot$se)

fieldex_plot$treatment <- rep(c("Reminder", 
                                "Individual\n Reward", 
                                "Individual\n Punishment",
                                "Social\n Reward",
                                "Social\n Punishment"), 4)

fieldex_plot$treatment <- as.factor(fieldex_plot$treatment)
fieldex_plot$treatment <- factor(fieldex_plot$treatment,
                                 levels = c("Reminder", "Individual\n Reward",
                                            "Individual\n Punishment",
                                            "Social\n Reward",
                                            "Social\n Punishment"))
class(fieldex_plot$treatment)
fieldex_plot$type <- as.factor(fieldex_plot$type)



# FDR and Bonferroni corrections

#Threshold for FDR correction 
# get and order the nominal p-values
ordered.ps <- fieldex_plot$`p-value`[order(fieldex_plot$`p-value`,decreasing=F)]  
ordered.ps

comp <- (1:length(ordered.ps)/length(ordered.ps))*(.05)

FDR <- cbind(ordered.ps,comp,ordered.ps<=comp)
fdr <- max(FDR[,1][FDR[,3]==1])
fdr

#Threshold for Bonferroni correction
bonf <- 0.05/length(ordered.ps)
bonf


fieldex_plot$Bonf_reject <- NA
fieldex_plot$FDR_reject <- NA

fieldex_plot$Bonf_reject[fieldex_plot$`p-value`<=bonf] <- "yes"
fieldex_plot$Bonf_reject[fieldex_plot$`p-value`>bonf]  <- "no"

fieldex_plot$FDR_reject[fieldex_plot$`p-value`<=fdr] <- "yes"
fieldex_plot$FDR_reject[fieldex_plot$`p-value`>fdr]  <- "no"

fieldex_plot$bonf_fdr <- as.numeric(fieldex_plot$Bonf_reject=="yes" & 
                                      fieldex_plot$FDR_reject=="yes")

pd <- position_dodge(width = 0.4)


#Figure
ggplot(fieldex_plot, aes(x=treatment, y=mean, group=type, 
                         color=type, shape=type)) +
  facet_wrap(~outcome) + 
  geom_errorbar(aes(x=treatment,
                    ymin=lower, ymax=upper), 
                width=.25, size=.9, alpha=.7, 
                position=pd) +
  geom_point(size=4.5, position=pd) +
  geom_point(size=2.5, position=pd, 
             data=fieldex_plot[fieldex_plot$bonf_fdr==0,],
             aes(x=treatment, y=mean, group=type, 
                 shape=type), color = "white") +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed") +
  xlab(" ") +
  ylab("Difference from the control group") +
  scale_colour_brewer(palette="Set1") +
  theme_minimal() +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 90, hjust = 1)) 


##############################################
message("APPENDIX FIGURE A20")

# Field Experiment: Effects of the Information Treatment 
# versus the Placebo Control (no IPW)

# a) Compliance 
comp_placebo <- rbind.data.frame(
  difference_in_means(JUL_2014_ontime ~ pooled_124_0, data = fieldex),
  difference_in_means(NOV_2014_ontime ~ pooled_124_0, data = fieldex),
  difference_in_means(MAR_2015_ontime ~ pooled_124_0, data = fieldex),
  difference_in_means(JUL_2015_ontime ~ pooled_124_0, data = fieldex),
  difference_in_means(NOV_2015_ontime ~ pooled_124_0, data = fieldex),
  difference_in_means(MAR_2016_ontime ~ pooled_124_0, data = fieldex),
  difference_in_means(JUL_2016_ontime ~ pooled_124_0, data = fieldex),
  difference_in_means(compliance_1416 ~ pooled_124_0, data = fieldex)
)
comp_placebo$outcome <- "Paid on Time"
comp_placebo$control <- "Treatment vs Placebo"

# b) Intended compliance
intcomp_placebo <- rbind.data.frame(
  difference_in_means(JUL_2014_WEBACCESS ~ pooled_124_0, data = fieldex),
  difference_in_means(NOV_2014_WEBACCESS ~ pooled_124_0, data = fieldex),
  difference_in_means(MAR_2015_WEBACCESS ~ pooled_124_0, data = fieldex),
  difference_in_means(JUL_2015_WEBACCESS ~ pooled_124_0, data = fieldex),
  difference_in_means(NOV_2015_WEBACCESS ~ pooled_124_0, data = fieldex),
  difference_in_means(MAR_2016_WEBACCESS ~ pooled_124_0, data = fieldex),
  difference_in_means(JUL_2016_WEBACCESS ~ pooled_124_0, data = fieldex),
  difference_in_means(intended_1416 ~ pooled_124_0, data = fieldex)
)
intcomp_placebo$outcome <- "Web Access"
intcomp_placebo$control <- "Treatment vs Placebo"

## Combine and plot
plotdata <- rbind.data.frame(comp_placebo, intcomp_placebo)
rm(comp_placebo, intcomp_placebo)

plotdata$time <- rep(1:8, 2)
#plotdata <- plotdata[plotdata$time!=8,]

plotdata$control <- as.factor(plotdata$control)

pd <- position_dodge(width = 0.6)

#Figure
ggplot(plotdata, aes(x=time, y=coefficients, group = control, shape = control)) + 
  facet_wrap( ~ outcome) + #, scales="free"
  geom_point(size=4.5, position=pd) +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed") + 
  geom_errorbar(aes(x=time,
                    ymin=conf.low, 
                    ymax=conf.high), 
                width=.15, size=1, position=pd) +
  xlab("Payments Since Receiving Flyer") + ylab("Average Causal Effect") + 
  theme_bw() +
  scale_colour_manual(values = c("black", "black")) +
  scale_x_continuous(breaks=1:8,
                     labels=c(as.character(1:7), "pooled")) + 
  theme(plot.title = element_text(size = rel(1.2)),
        axis.text.x = element_text(size = rel(1)),
        axis.text.y = element_text(size = rel(1)),
        axis.title.y = element_text(size = rel(1)),
        axis.title.x = element_text(size = rel(1)),
        legend.text = element_text(size = rel(1)),
        strip.text.x = element_text(size = rel(1)),
        strip.text.y = element_text(size = rel(1)),
        legend.position = "bottom",
        legend.title=element_blank()) 




##############################################
message("APPENDIX FIGURE A21")

#Field Experiment: Effects of Information About the 
# Tax Holiday on Compliance. Heterogeneous Treatment Effects by Taxpayer Type.

#### Eligibles

#### TREATMENT VERSUS PURE CONTROL

# a) compliance
comp_controlE <- rbind.data.frame(
  difference_in_means(JUL_2014_ontime ~ pooled_124_6, data = fieldexE),
  difference_in_means(NOV_2014_ontime ~ pooled_124_6, data = fieldexE),
  difference_in_means(MAR_2015_ontime ~ pooled_124_6, data = fieldexE),
  difference_in_means(JUL_2015_ontime ~ pooled_124_6, data = fieldexE),
  difference_in_means(NOV_2015_ontime ~ pooled_124_6, data = fieldexE),
  difference_in_means(MAR_2016_ontime ~ pooled_124_6, data = fieldexE),
  difference_in_means(JUL_2016_ontime ~ pooled_124_6, data = fieldexE),
  difference_in_means(compliance_1416 ~ pooled_124_6, data = fieldexE)
)
comp_controlE$outcome <- "Paid on Time"
comp_controlE$control <- "Treatment vs Pure Control"
comp_controlE$type <- "Eligibles"

# b) Intended compliance
intcomp_controlE <- rbind.data.frame(
  difference_in_means(JUL_2014_WEBACCESS ~ pooled_124_6, data = fieldexE),
  difference_in_means(NOV_2014_WEBACCESS ~ pooled_124_6, data = fieldexE),
  difference_in_means(MAR_2015_WEBACCESS ~ pooled_124_6, data = fieldexE),
  difference_in_means(JUL_2015_WEBACCESS ~ pooled_124_6, data = fieldexE),
  difference_in_means(NOV_2015_WEBACCESS ~ pooled_124_6, data = fieldexE),
  difference_in_means(MAR_2016_WEBACCESS ~ pooled_124_6, data = fieldexE),
  difference_in_means(JUL_2016_WEBACCESS ~ pooled_124_6, data = fieldexE),
  difference_in_means(intended_1416 ~ pooled_124_6, data = fieldexE)
)
intcomp_controlE$control <- "Treatment vs Pure Control"
intcomp_controlE$outcome <- "Web Access"
intcomp_controlE$type <- "Eligibles"

#### TREATMENT VERSUS PLACEBO 

# a) Compliance 
comp_placeboE <- rbind.data.frame(
  difference_in_means(JUL_2014_ontime ~ pooled_124_0, data = fieldexE),
  difference_in_means(NOV_2014_ontime ~ pooled_124_0, data = fieldexE),
  difference_in_means(MAR_2015_ontime ~ pooled_124_0, data = fieldexE),
  difference_in_means(JUL_2015_ontime ~ pooled_124_0, data = fieldexE),
  difference_in_means(NOV_2015_ontime ~ pooled_124_0, data = fieldexE),
  difference_in_means(MAR_2016_ontime ~ pooled_124_0, data = fieldexE),
  difference_in_means(JUL_2016_ontime ~ pooled_124_0, data = fieldexE),
  difference_in_means(compliance_1416 ~ pooled_124_0, data = fieldexE)
)
comp_placeboE$control <- "Treatment vs Placebo"
comp_placeboE$outcome <- "Paid on Time"
comp_placeboE$type <- "Eligibles"

# b) Intended compliance
intcomp_placeboE <- rbind.data.frame(
  difference_in_means(JUL_2014_WEBACCESS ~ pooled_124_0, data = fieldexE),
  difference_in_means(NOV_2014_WEBACCESS ~ pooled_124_0, data = fieldexE),
  difference_in_means(MAR_2015_WEBACCESS ~ pooled_124_0, data = fieldexE),
  difference_in_means(JUL_2015_WEBACCESS ~ pooled_124_0, data = fieldexE),
  difference_in_means(NOV_2015_WEBACCESS ~ pooled_124_0, data = fieldexE),
  difference_in_means(MAR_2016_WEBACCESS ~ pooled_124_0, data = fieldexE),
  difference_in_means(JUL_2016_WEBACCESS ~ pooled_124_0, data = fieldexE),
  difference_in_means(intended_1416 ~ pooled_124_0, data = fieldexE)
)
intcomp_placeboE$outcome <- "Web Access"
intcomp_placeboE$control <- "Treatment vs Placebo"
intcomp_placeboE$type <- "Eligibles"

#### PURE CONTROL VERSUS PLACEBO 

# a) Compliance 
comp_control_plaE <- rbind.data.frame(
  difference_in_means(JUL_2014_ontime ~ pooled_0_6, data = fieldexE),
  difference_in_means(NOV_2014_ontime ~ pooled_0_6, data = fieldexE),
  difference_in_means(MAR_2015_ontime ~ pooled_0_6, data = fieldexE),
  difference_in_means(JUL_2015_ontime ~ pooled_0_6, data = fieldexE),
  difference_in_means(NOV_2015_ontime ~ pooled_0_6, data = fieldexE),
  difference_in_means(MAR_2016_ontime ~ pooled_0_6, data = fieldexE),
  difference_in_means(JUL_2016_ontime ~ pooled_0_6, data = fieldexE),
  difference_in_means(compliance_1416 ~ pooled_0_6, data = fieldexE)
)
comp_control_plaE$outcome <- "Paid on Time"
comp_control_plaE$control <- "Placebo vs Pure Control"
comp_control_plaE$type <- "Eligibles"

# b) Intended compliance
intcomp_control_plaE <- rbind.data.frame(
  difference_in_means(JUL_2014_WEBACCESS ~ pooled_0_6, data = fieldexE),
  difference_in_means(NOV_2014_WEBACCESS ~ pooled_0_6, data = fieldexE),
  difference_in_means(MAR_2015_WEBACCESS ~ pooled_0_6, data = fieldexE),
  difference_in_means(JUL_2015_WEBACCESS ~ pooled_0_6, data = fieldexE),
  difference_in_means(NOV_2015_WEBACCESS ~ pooled_0_6, data = fieldexE),
  difference_in_means(MAR_2016_WEBACCESS ~ pooled_0_6, data = fieldexE),
  difference_in_means(JUL_2016_WEBACCESS ~ pooled_0_6, data = fieldexE),
  difference_in_means(intended_1416 ~ pooled_0_6, data = fieldexE)
)
intcomp_control_plaE$outcome <- "Web Access"
intcomp_control_plaE$control <- "Placebo vs Pure Control"
intcomp_control_plaE$type <- "Eligibles"

#### Ineligibles

#### TREATMENT VERSUS PURE CONTROL

# a) compliance
comp_controlN <- rbind.data.frame(
  difference_in_means(JUL_2014_ontime ~ pooled_124_6, data = fieldexN),
  difference_in_means(NOV_2014_ontime ~ pooled_124_6, data = fieldexN),
  difference_in_means(MAR_2015_ontime ~ pooled_124_6, data = fieldexN),
  difference_in_means(JUL_2015_ontime ~ pooled_124_6, data = fieldexN),
  difference_in_means(NOV_2015_ontime ~ pooled_124_6, data = fieldexN),
  difference_in_means(MAR_2016_ontime ~ pooled_124_6, data = fieldexN),
  difference_in_means(JUL_2016_ontime ~ pooled_124_6, data = fieldexN),
  difference_in_means(compliance_1416 ~ pooled_124_6, data = fieldexN)
)
comp_controlN$outcome <- "Paid on Time"
comp_controlN$control <- "Treatment vs Pure Control"
comp_controlN$type <- "Ineligibles"

# b) Intended compliance
intcomp_controlN <- rbind.data.frame(
  difference_in_means(JUL_2014_WEBACCESS ~ pooled_124_6, data = fieldexN),
  difference_in_means(NOV_2014_WEBACCESS ~ pooled_124_6, data = fieldexN),
  difference_in_means(MAR_2015_WEBACCESS ~ pooled_124_6, data = fieldexN),
  difference_in_means(JUL_2015_WEBACCESS ~ pooled_124_6, data = fieldexN),
  difference_in_means(NOV_2015_WEBACCESS ~ pooled_124_6, data = fieldexN),
  difference_in_means(MAR_2016_WEBACCESS ~ pooled_124_6, data = fieldexN),
  difference_in_means(JUL_2016_WEBACCESS ~ pooled_124_6, data = fieldexN),
  difference_in_means(intended_1416 ~ pooled_124_6, data = fieldexN)
)
intcomp_controlN$outcome <- "Web Access"
intcomp_controlN$control <- "Treatment vs Pure Control"
intcomp_controlN$type <- "Ineligibles"

#### TREATMENT VERSUS PLACEBO 

# a) Compliance 
comp_placeboN <- rbind.data.frame(
  difference_in_means(JUL_2014_ontime ~ pooled_124_0, data = fieldexN),
  difference_in_means(NOV_2014_ontime ~ pooled_124_0, data = fieldexN),
  difference_in_means(MAR_2015_ontime ~ pooled_124_0, data = fieldexN),
  difference_in_means(JUL_2015_ontime ~ pooled_124_0, data = fieldexN),
  difference_in_means(NOV_2015_ontime ~ pooled_124_0, data = fieldexN),
  difference_in_means(MAR_2016_ontime ~ pooled_124_0, data = fieldexN),
  difference_in_means(JUL_2016_ontime ~ pooled_124_0, data = fieldexN),
  difference_in_means(compliance_1416 ~ pooled_124_0, data = fieldexN)
)
comp_placeboN$outcome <- "Paid on Time"
comp_placeboN$control <- "Treatment vs Placebo"
comp_placeboN$type <- "Ineligibles"

# b) Intended compliance
intcomp_placeboN <- rbind.data.frame(
  difference_in_means(JUL_2014_WEBACCESS ~ pooled_124_0, data = fieldexN),
  difference_in_means(NOV_2014_WEBACCESS ~ pooled_124_0, data = fieldexN),
  difference_in_means(MAR_2015_WEBACCESS ~ pooled_124_0, data = fieldexN),
  difference_in_means(JUL_2015_WEBACCESS ~ pooled_124_0, data = fieldexN),
  difference_in_means(NOV_2015_WEBACCESS ~ pooled_124_0, data = fieldexN),
  difference_in_means(MAR_2016_WEBACCESS ~ pooled_124_0, data = fieldexN),
  difference_in_means(JUL_2016_WEBACCESS ~ pooled_124_0, data = fieldexN),
  difference_in_means(intended_1416 ~ pooled_124_0, data = fieldexN)
)
intcomp_placeboN$outcome <- "Web Access"
intcomp_placeboN$control <- "Treatment vs Placebo"
intcomp_placeboN$type <- "Ineligibles"

#### PURE CONTROL VERSUS PLACEBO 

# a) Compliance 
comp_control_plaN <- rbind.data.frame(
  difference_in_means(JUL_2014_ontime ~ pooled_0_6, data = fieldexN),
  difference_in_means(NOV_2014_ontime ~ pooled_0_6, data = fieldexN),
  difference_in_means(MAR_2015_ontime ~ pooled_0_6, data = fieldexN),
  difference_in_means(JUL_2015_ontime ~ pooled_0_6, data = fieldexN),
  difference_in_means(NOV_2015_ontime ~ pooled_0_6, data = fieldexN),
  difference_in_means(MAR_2016_ontime ~ pooled_0_6, data = fieldexN),
  difference_in_means(JUL_2016_ontime ~ pooled_0_6, data = fieldexN),
  difference_in_means(compliance_1416 ~ pooled_0_6, data = fieldexN)
)
comp_control_plaN$outcome <- "Paid on Time"
comp_control_plaN$control <- "Placebo vs Pure Control"
comp_control_plaN$type <- "Ineligibles"

# b) Intended compliance
intcomp_control_plaN <- rbind.data.frame(
  difference_in_means(JUL_2014_WEBACCESS ~ pooled_0_6, data = fieldexN),
  difference_in_means(NOV_2014_WEBACCESS ~ pooled_0_6, data = fieldexN),
  difference_in_means(MAR_2015_WEBACCESS ~ pooled_0_6, data = fieldexN),
  difference_in_means(JUL_2015_WEBACCESS ~ pooled_0_6, data = fieldexN),
  difference_in_means(NOV_2015_WEBACCESS ~ pooled_0_6, data = fieldexN),
  difference_in_means(MAR_2016_WEBACCESS ~ pooled_0_6, data = fieldexN),
  difference_in_means(JUL_2016_WEBACCESS ~ pooled_0_6, data = fieldexN),
  difference_in_means(intended_1416 ~ pooled_0_6, data = fieldexN)
)
intcomp_control_plaN$outcome <- "Web Access"
intcomp_control_plaN$control <- "Placebo vs Pure Control"
intcomp_control_plaN$type <- "Ineligibles"


## Combine and plot
plotdata <- rbind.data.frame(comp_placeboE, comp_controlE,
                             intcomp_placeboE, intcomp_controlE, 
                             comp_control_plaE, intcomp_control_plaE,
                             comp_placeboN, comp_controlN,
                             intcomp_placeboN, intcomp_controlN, 
                             comp_control_plaN, intcomp_control_plaN)
rm(comp_placeboE, comp_controlE,
   intcomp_placeboE, intcomp_controlE, 
   comp_control_plaE, intcomp_control_plaE,
   comp_placeboN, comp_controlN,
   intcomp_placeboN, intcomp_controlN, 
   comp_control_plaN, intcomp_control_plaN)

plotdata$time <- rep(1:8, 6)
#plotdata <- plotdata[plotdata$time!=8,]

plotdata$control <- as.factor(plotdata$control)

pd <- position_dodge(width = 0.6)

#Figure
ggplot(plotdata, aes(x=time, y=coefficients, group = control, shape = control)) + 
  facet_grid(type ~ outcome) + #, scales="free"
  geom_point(size=4.5, position=pd) +
  geom_hline(aes(yintercept=0), size=.5, linetype="dashed") + 
  geom_errorbar(aes(x=time,
                    ymin=conf.low, 
                    ymax=conf.high), 
                width=.15, size=1, position=pd) +
  xlab("Payments Since Receiving Flyer") + ylab("Average Causal Effect") + 
  theme_minimal() +
  scale_colour_manual(values = c("black", "black")) +
  scale_x_continuous(breaks=1:8,
                     labels=c(as.character(1:7), "pooled")) + 
  theme(plot.title = element_text(size = rel(1.2)),
        axis.text.x = element_text(size = rel(1)),
        axis.text.y = element_text(size = rel(1)),
        axis.title.y = element_text(size = rel(1)),
        axis.title.x = element_text(size = rel(1)),
        legend.text = element_text(size = rel(1)),
        strip.text.x = element_text(size = rel(1)),
        strip.text.y = element_text(size = rel(1)),
        legend.position = "bottom",
        legend.title=element_blank()) 

##############################################
message("APPENDIX TABLE A6")

# Field Experiment: Effects of Information About the Tax 
# Holiday on Compliance (weighted averages of block-specific 
# effects for eligible and ineligible taxpayers)


blockedATE <- rbind.data.frame(
  # Treatment versus placebo: compliance
  difference_in_means(compliance_1416 ~ pooled_124_0, blocks = type, 
                      se_type = "default", data = fieldex),
  # Treatment versus pure control: compliance
  difference_in_means(compliance_1416 ~ pooled_124_6, blocks = type, 
                      se_type = "default", data = fieldex),
  # Placebo versus pure control: compliance
  difference_in_means(compliance_1416 ~ pooled_0_6, blocks = type, 
                      se_type = "default", data = fieldex),
  # Treatment versus placebo: intended compliance
  difference_in_means(intended_1416 ~ pooled_124_0, blocks = type, 
                      se_type = "default", data = fieldex),
  # Treatment versus pure control: intended compliance
  difference_in_means(intended_1416 ~ pooled_124_6, blocks = type, 
                      se_type = "default", data = fieldex),
  # Placebo versus pure control: intended compliance
  difference_in_means(intended_1416 ~ pooled_0_6, blocks = type, 
                      se_type = "default", data = fieldex)
  
)
blockedATE$outcome <- rep(c("Paid on Time", "Web Access"), each=3)
blockedATE$control <- rep(c("Treatment vs Placebo","Treatment vs Pure Control", 
                            "Placebo vs Pure Control"), 2)

#Table
blockedATE[,c(1,2,4,6,11,18)]


##############################################
message("APPENDIX TABLE A7")

# Survey Experiment: Pooled Lottery vs. Discretionary 
# Benefit Conditions

survexp.results <- rbind.data.frame(
  difference_in_means(S1p4 ~ treat_discretion, data = survey_data),
  difference_in_means(S1p1 ~ treat_discretion, data = survey_data),
  difference_in_means(S1p3 ~ treat_discretion, data = survey_data),
  difference_in_means(S1p2 ~ treat_discretion, data = survey_data),
  difference_in_means(S1p5 ~ treat_discretion, data = survey_data)
)

survexp.results$outcome <- c("Rewards Go To The Same People As Always",
                             "Rewards Are A Waste Of Money",
                             "Worth It To Be Up To Date",
                             "Municipal Government Does A Good Job",
                             "Municipal Taxes Are Just")

survexp.results


#Threshold for FDR correction 
# get and order the nominal p-values
ordered.ps <- survexp.results$p.value[order(survexp.results$p.value,decreasing=F)]  
ordered.ps

comp <- (1:length(ordered.ps)/length(ordered.ps))*(.05)

FDR <- cbind(ordered.ps,comp,ordered.ps<=comp)
FDR

fdr <- max(FDR[,1][FDR[,3]==1])
fdr

#Threshold for Bonferroni correction
bonf <- 0.05/length(ordered.ps)
bonf


survexp.results$Bonf_reject <- NA
survexp.results$FDR_reject <- NA

survexp.results$Bonf_reject[survexp.results$p.value<=bonf] <- "yes"
survexp.results$Bonf_reject[survexp.results$p.value>bonf]  <- "no"

survexp.results$FDR_reject[survexp.results$p.value<=fdr] <- "yes"
survexp.results$FDR_reject[survexp.results$p.value>fdr]  <- "no"

#Table
survexp.results[c(1,3,5,4,2),c(11,1,2,4,6,16,17)]
